diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/Android.mk | 6 | ||||
-rw-r--r-- | compiler/compilers.cc | 3 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/arm64_lir.h | 755 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/assemble_arm64.cc | 2055 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/call_arm64.cc | 329 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/codegen_arm64.h | 49 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/create.sh | 19 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/fp_arm64.cc | 168 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 821 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 771 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/utility_arm64.cc | 1328 | ||||
-rw-r--r-- | compiler/dex/quick/codegen_util.cc | 1 | ||||
-rw-r--r-- | compiler/dex/quick/gen_invoke.cc | 2 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 7 |
14 files changed, 2584 insertions, 3730 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index a993251fcf..1b70d59def 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -27,6 +27,12 @@ LIBART_COMPILER_SRC_FILES := \ dex/quick/arm/int_arm.cc \ dex/quick/arm/target_arm.cc \ dex/quick/arm/utility_arm.cc \ + dex/quick/arm64/assemble_arm64.cc \ + dex/quick/arm64/call_arm64.cc \ + dex/quick/arm64/fp_arm64.cc \ + dex/quick/arm64/int_arm64.cc \ + dex/quick/arm64/target_arm64.cc \ + dex/quick/arm64/utility_arm64.cc \ dex/quick/codegen_util.cc \ dex/quick/dex_file_method_inliner.cc \ dex/quick/dex_file_to_method_inliner_map.cc \ diff --git a/compiler/compilers.cc b/compiler/compilers.cc index 6bf0058196..79a85db79a 100644 --- a/compiler/compilers.cc +++ b/compiler/compilers.cc @@ -102,8 +102,7 @@ Backend* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_ mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena); break; case kArm64: - // TODO(Arm64): replace the generator below with a proper one. - mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena); + mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena); break; case kMips: mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena); diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index c6d6295812..452c8d703b 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -22,6 +22,8 @@ namespace art { /* + * TODO(Arm64): the comments below are outdated. + * * Runtime register usage conventions. * * r0-r3: Argument registers in both Dalvik and C/C++ conventions. @@ -29,12 +31,12 @@ namespace art { * pointer in r0 as a hidden arg0. Otherwise used as codegen scratch * registers. * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit - * r4 : (rARM_SUSPEND) is reserved (suspend check/debugger assist) + * r4 : (rA64_SUSPEND) is reserved (suspend check/debugger assist) * r5 : Callee save (promotion target) * r6 : Callee save (promotion target) * r7 : Callee save (promotion target) * r8 : Callee save (promotion target) - * r9 : (rARM_SELF) is reserved (pointer to thread-local storage) + * r9 : (rA64_SELF) is reserved (pointer to thread-local storage) * r10 : Callee save (promotion target) * r11 : Callee save (promotion target) * r12 : Scratch, may be trashed by linkage stubs @@ -93,453 +95,285 @@ namespace art { * +========================+ */ +#if 1 +#define A64_PTR_SIZE 4 +#define A64_GET_INT_OFFS(offs) ((offs).Int32Value()) +#else +// Not yet ready for this. +#define A64_PTR_SIZE 8 +#define A64_GET_INT_OFFS(offs) ((offs).Int32Value()) +#endif + +#define A64_QUICK_ENTRYPOINT_OFFSET(name) QUICK_ENTRYPOINT_OFFSET(A64_PTR_SIZE, name) +#define A64_QUICK_ENTRYPOINT_INT_OFFS(name) A64_GET_INT_OFFS(A64_QUICK_ENTRYPOINT_OFFSET(name)) +#define A64_THREAD_THIN_LOCK_ID_OFFSET A64_GET_INT_OFFS(Thread::ThinLockIdOffset<A64_PTR_SIZE>()) +#define A64_THREAD_EXCEPTION_INT_OFFS A64_GET_INT_OFFS(Thread::ExceptionOffset<A64_PTR_SIZE>()) +#define A64_THREAD_CARD_TABLE_INT_OFFS A64_GET_INT_OFFS(Thread::CardTableOffset<A64_PTR_SIZE>()) +#define A64_THREAD_STACK_END_INT_OFFS A64_GET_INT_OFFS(Thread::StackEndOffset<A64_PTR_SIZE>()) +#define A64_THREAD_SUSPEND_TRIGGER_OFFSET \ + A64_GET_INT_OFFS(Thread::ThreadSuspendTriggerOffset<A64_PTR_SIZE>()) +typedef ThreadOffset<A64_PTR_SIZE> A64ThreadOffset; + +// Offset to distinguish FP regs. +#define ARM_FP_REG_OFFSET 32 // First FP callee save. #define ARM_FP_CALLEE_SAVE_BASE 16 +// Mask to strip off fp flags. +#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET - 1) + +// Temporary macros, used to mark code which wants to distinguish betweek zr/sp. +#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp) +#define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr) + enum ArmResourceEncodingPos { kArmGPReg0 = 0, - kArmRegSP = 13, - kArmRegLR = 14, - kArmRegPC = 15, - kArmFPReg0 = 16, - kArmFPReg16 = 32, - kArmRegEnd = 48, + kArmRegLR = 30, + kArmRegSP = 31, + kArmFPReg0 = 32, + kArmRegEnd = 64, }; -#define ENCODE_ARM_REG_LIST(N) (static_cast<uint64_t>(N)) #define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) #define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) -#define ENCODE_ARM_REG_PC (1ULL << kArmRegPC) -#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16) - -enum ArmNativeRegisterPool { - r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, - r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1, - r2 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2, - r3 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3, - rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4, - r5 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5, - r6 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6, - r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7, - r8 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8, - rARM_SELF = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9, - r10 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10, - r11 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11, - r12 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12, - r13sp = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13, - rARM_SP = r13sp, - r14lr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14, - rARM_LR = r14lr, - r15pc = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15, - rARM_PC = r15pc, - - fr0 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0, - fr1 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1, - fr2 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2, - fr3 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3, - fr4 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4, - fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5, - fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, - fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, - fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8, - fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9, - fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10, - fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11, - fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12, - fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13, - fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14, - fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15, - fr16 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16, - fr17 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17, - fr18 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18, - fr19 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19, - fr20 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20, - fr21 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21, - fr22 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22, - fr23 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23, - fr24 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24, - fr25 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25, - fr26 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26, - fr27 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27, - fr28 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28, - fr29 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29, - fr30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30, - fr31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31, - - dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, - dr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1, - dr2 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2, - dr3 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3, - dr4 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4, - dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, - dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, - dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, - dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, - dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9, - dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, - dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11, - dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12, - dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13, - dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, - dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15, -#if 0 - // Enable when def/use and runtime able to handle these. - dr16 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16, - dr17 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17, - dr18 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18, - dr19 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19, - dr20 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20, - dr21 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21, - dr22 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22, - dr23 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23, - dr24 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24, - dr25 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25, - dr26 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26, - dr27 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27, - dr28 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28, - dr29 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29, - dr30 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30, - dr31 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31, -#endif + +#define IS_SIGNED_IMM(size, value) \ + ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1))) +#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value) +#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value) +#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value) +#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value) +#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value) + +// Quick macro used to define the registers. +#define A64_REGISTER_CODE_LIST(R) \ + R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ + R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \ + R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \ + R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31) + +// Registers (integer) values. +// TODO(Arm64): for now we define rx##nr identically to rw##nr. We should rather define rx##nr as +// a k64BitSolo. We should do this once the register allocator is ready. +enum A64NativeRegisterPool { +# define A64_DEFINE_REGISTERS(nr) \ + rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \ + rx##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \ + rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \ + rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr, + A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS) +#undef A64_DEFINE_REGISTERS + + // TODO(Arm64): can we change the lines below such that rwzr != rwsp && rxzr != rsp? + // This would be desirable to allow detecting usage-errors in the assembler. + rwzr = rw31, + rxzr = rx31, + rwsp = rw31, + rsp = rx31, + rA64_SUSPEND = rx4, + rA64_SELF = rx18, + rA64_SP = rx31, + rA64_LR = rx30 }; -constexpr RegStorage rs_r0(RegStorage::kValid | r0); -constexpr RegStorage rs_r1(RegStorage::kValid | r1); -constexpr RegStorage rs_r2(RegStorage::kValid | r2); -constexpr RegStorage rs_r3(RegStorage::kValid | r3); -constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND); -constexpr RegStorage rs_r5(RegStorage::kValid | r5); -constexpr RegStorage rs_r6(RegStorage::kValid | r6); -constexpr RegStorage rs_r7(RegStorage::kValid | r7); -constexpr RegStorage rs_r8(RegStorage::kValid | r8); -constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF); -constexpr RegStorage rs_r10(RegStorage::kValid | r10); -constexpr RegStorage rs_r11(RegStorage::kValid | r11); -constexpr RegStorage rs_r12(RegStorage::kValid | r12); -constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp); -constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP); -constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr); -constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR); -constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc); -constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC); -constexpr RegStorage rs_invalid(RegStorage::kInvalid); - -constexpr RegStorage rs_fr0(RegStorage::kValid | fr0); -constexpr RegStorage rs_fr1(RegStorage::kValid | fr1); -constexpr RegStorage rs_fr2(RegStorage::kValid | fr2); -constexpr RegStorage rs_fr3(RegStorage::kValid | fr3); -constexpr RegStorage rs_fr4(RegStorage::kValid | fr4); -constexpr RegStorage rs_fr5(RegStorage::kValid | fr5); -constexpr RegStorage rs_fr6(RegStorage::kValid | fr6); -constexpr RegStorage rs_fr7(RegStorage::kValid | fr7); -constexpr RegStorage rs_fr8(RegStorage::kValid | fr8); -constexpr RegStorage rs_fr9(RegStorage::kValid | fr9); -constexpr RegStorage rs_fr10(RegStorage::kValid | fr10); -constexpr RegStorage rs_fr11(RegStorage::kValid | fr11); -constexpr RegStorage rs_fr12(RegStorage::kValid | fr12); -constexpr RegStorage rs_fr13(RegStorage::kValid | fr13); -constexpr RegStorage rs_fr14(RegStorage::kValid | fr14); -constexpr RegStorage rs_fr15(RegStorage::kValid | fr15); -constexpr RegStorage rs_fr16(RegStorage::kValid | fr16); -constexpr RegStorage rs_fr17(RegStorage::kValid | fr17); -constexpr RegStorage rs_fr18(RegStorage::kValid | fr18); -constexpr RegStorage rs_fr19(RegStorage::kValid | fr19); -constexpr RegStorage rs_fr20(RegStorage::kValid | fr20); -constexpr RegStorage rs_fr21(RegStorage::kValid | fr21); -constexpr RegStorage rs_fr22(RegStorage::kValid | fr22); -constexpr RegStorage rs_fr23(RegStorage::kValid | fr23); -constexpr RegStorage rs_fr24(RegStorage::kValid | fr24); -constexpr RegStorage rs_fr25(RegStorage::kValid | fr25); -constexpr RegStorage rs_fr26(RegStorage::kValid | fr26); -constexpr RegStorage rs_fr27(RegStorage::kValid | fr27); -constexpr RegStorage rs_fr28(RegStorage::kValid | fr28); -constexpr RegStorage rs_fr29(RegStorage::kValid | fr29); -constexpr RegStorage rs_fr30(RegStorage::kValid | fr30); -constexpr RegStorage rs_fr31(RegStorage::kValid | fr31); - -constexpr RegStorage rs_dr0(RegStorage::kValid | dr0); -constexpr RegStorage rs_dr1(RegStorage::kValid | dr1); -constexpr RegStorage rs_dr2(RegStorage::kValid | dr2); -constexpr RegStorage rs_dr3(RegStorage::kValid | dr3); -constexpr RegStorage rs_dr4(RegStorage::kValid | dr4); -constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); -constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); -constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); -constexpr RegStorage rs_dr8(RegStorage::kValid | dr8); -constexpr RegStorage rs_dr9(RegStorage::kValid | dr9); -constexpr RegStorage rs_dr10(RegStorage::kValid | dr10); -constexpr RegStorage rs_dr11(RegStorage::kValid | dr11); -constexpr RegStorage rs_dr12(RegStorage::kValid | dr12); -constexpr RegStorage rs_dr13(RegStorage::kValid | dr13); -constexpr RegStorage rs_dr14(RegStorage::kValid | dr14); -constexpr RegStorage rs_dr15(RegStorage::kValid | dr15); -#if 0 -constexpr RegStorage rs_dr16(RegStorage::kValid | dr16); -constexpr RegStorage rs_dr17(RegStorage::kValid | dr17); -constexpr RegStorage rs_dr18(RegStorage::kValid | dr18); -constexpr RegStorage rs_dr19(RegStorage::kValid | dr19); -constexpr RegStorage rs_dr20(RegStorage::kValid | dr20); -constexpr RegStorage rs_dr21(RegStorage::kValid | dr21); -constexpr RegStorage rs_dr22(RegStorage::kValid | dr22); -constexpr RegStorage rs_dr23(RegStorage::kValid | dr23); -constexpr RegStorage rs_dr24(RegStorage::kValid | dr24); -constexpr RegStorage rs_dr25(RegStorage::kValid | dr25); -constexpr RegStorage rs_dr26(RegStorage::kValid | dr26); -constexpr RegStorage rs_dr27(RegStorage::kValid | dr27); -constexpr RegStorage rs_dr28(RegStorage::kValid | dr28); -constexpr RegStorage rs_dr29(RegStorage::kValid | dr29); -constexpr RegStorage rs_dr30(RegStorage::kValid | dr30); -constexpr RegStorage rs_dr31(RegStorage::kValid | dr31); -#endif +#define A64_DEFINE_REGSTORAGES(nr) \ + constexpr RegStorage rs_w##nr(RegStorage::kValid | rw##nr); \ + constexpr RegStorage rs_x##nr(RegStorage::kValid | rx##nr); \ + constexpr RegStorage rs_f##nr(RegStorage::kValid | rf##nr); \ + constexpr RegStorage rs_d##nr(RegStorage::kValid | rd##nr); +A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES) +#undef A64_DEFINE_REGSTORAGES -// RegisterLocation templates return values (r0, or r0/r1). -const RegLocation arm_loc_c_return - {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG}; -const RegLocation arm_loc_c_return_wide - {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG}; -const RegLocation arm_loc_c_return_float - {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG}; -const RegLocation arm_loc_c_return_double - {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, - RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG}; - -enum ArmShiftEncodings { - kArmLsl = 0x0, - kArmLsr = 0x1, - kArmAsr = 0x2, - kArmRor = 0x3 +constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr); +constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr); +constexpr RegStorage rs_rA64_SUSPEND(RegStorage::kValid | rA64_SUSPEND); +constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF); +constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP); +constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR); + +// RegisterLocation templates return values (following the hard-float calling convention). +const RegLocation arm_loc_c_return = + {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_wide = + {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_float = + {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_double = + {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG}; + +/** + * @brief Shift-type to be applied to a register via EncodeShift(). + */ +enum A64ShiftEncodings { + kA64Lsl = 0x0, + kA64Lsr = 0x1, + kA64Asr = 0x2, + kA64Ror = 0x3 }; +/** + * @brief Extend-type to be applied to a register via EncodeExtend(). + */ +enum A64RegExtEncodings { + kA64Uxtb = 0x0, + kA64Uxth = 0x1, + kA64Uxtw = 0x2, + kA64Uxtx = 0x3, + kA64Sxtb = 0x4, + kA64Sxth = 0x5, + kA64Sxtw = 0x6, + kA64Sxtx = 0x7 +}; + +#define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0)) + /* - * The following enum defines the list of supported Thumb instructions by the + * The following enum defines the list of supported A64 instructions by the * assembler. Their corresponding EncodingMap positions will be defined in - * Assemble.cc. + * assemble_arm64.cc. */ enum ArmOpcode { - kArmFirst = 0, - kArm16BitData = kArmFirst, // DATA [0] rd[15..0]. - kThumbAdcRR, // adc [0100000101] rm[5..3] rd[2..0]. - kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0]. - kThumbAddRI8, // add(2) [00110] rd[10..8] imm_8[7..0]. - kThumbAddRRR, // add(3) [0001100] rm[8..6] rn[5..3] rd[2..0]. - kThumbAddRRLH, // add(4) [01000100] H12[01] rm[5..3] rd[2..0]. - kThumbAddRRHL, // add(4) [01001000] H12[10] rm[5..3] rd[2..0]. - kThumbAddRRHH, // add(4) [01001100] H12[11] rm[5..3] rd[2..0]. - kThumbAddPcRel, // add(5) [10100] rd[10..8] imm_8[7..0]. - kThumbAddSpRel, // add(6) [10101] rd[10..8] imm_8[7..0]. - kThumbAddSpI7, // add(7) [101100000] imm_7[6..0]. - kThumbAndRR, // and [0100000000] rm[5..3] rd[2..0]. - kThumbAsrRRI5, // asr(1) [00010] imm_5[10..6] rm[5..3] rd[2..0]. - kThumbAsrRR, // asr(2) [0100000100] rs[5..3] rd[2..0]. - kThumbBCond, // b(1) [1101] cond[11..8] offset_8[7..0]. - kThumbBUncond, // b(2) [11100] offset_11[10..0]. - kThumbBicRR, // bic [0100001110] rm[5..3] rd[2..0]. - kThumbBkpt, // bkpt [10111110] imm_8[7..0]. - kThumbBlx1, // blx(1) [111] H[10] offset_11[10..0]. - kThumbBlx2, // blx(1) [111] H[01] offset_11[10..0]. - kThumbBl1, // blx(1) [111] H[10] offset_11[10..0]. - kThumbBl2, // blx(1) [111] H[11] offset_11[10..0]. - kThumbBlxR, // blx(2) [010001111] rm[6..3] [000]. - kThumbBx, // bx [010001110] H2[6..6] rm[5..3] SBZ[000]. - kThumbCmnRR, // cmn [0100001011] rm[5..3] rd[2..0]. - kThumbCmpRI8, // cmp(1) [00101] rn[10..8] imm_8[7..0]. - kThumbCmpRR, // cmp(2) [0100001010] rm[5..3] rd[2..0]. - kThumbCmpLH, // cmp(3) [01000101] H12[01] rm[5..3] rd[2..0]. - kThumbCmpHL, // cmp(3) [01000110] H12[10] rm[5..3] rd[2..0]. - kThumbCmpHH, // cmp(3) [01000111] H12[11] rm[5..3] rd[2..0]. - kThumbEorRR, // eor [0100000001] rm[5..3] rd[2..0]. - kThumbLdmia, // ldmia [11001] rn[10..8] reglist [7..0]. - kThumbLdrRRI5, // ldr(1) [01101] imm_5[10..6] rn[5..3] rd[2..0]. - kThumbLdrRRR, // ldr(2) [0101100] rm[8..6] rn[5..3] rd[2..0]. - kThumbLdrPcRel, // ldr(3) [01001] rd[10..8] imm_8[7..0]. - kThumbLdrSpRel, // ldr(4) [10011] rd[10..8] imm_8[7..0]. - kThumbLdrbRRI5, // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0]. - kThumbLdrbRRR, // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0]. - kThumbLdrhRRI5, // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0]. - kThumbLdrhRRR, // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0]. - kThumbLdrsbRRR, // ldrsb [0101011] rm[8..6] rn[5..3] rd[2..0]. - kThumbLdrshRRR, // ldrsh [0101111] rm[8..6] rn[5..3] rd[2..0]. - kThumbLslRRI5, // lsl(1) [00000] imm_5[10..6] rm[5..3] rd[2..0]. - kThumbLslRR, // lsl(2) [0100000010] rs[5..3] rd[2..0]. - kThumbLsrRRI5, // lsr(1) [00001] imm_5[10..6] rm[5..3] rd[2..0]. - kThumbLsrRR, // lsr(2) [0100000011] rs[5..3] rd[2..0]. - kThumbMovImm, // mov(1) [00100] rd[10..8] imm_8[7..0]. - kThumbMovRR, // mov(2) [0001110000] rn[5..3] rd[2..0]. - kThumbMovRR_H2H, // mov(3) [01000111] H12[11] rm[5..3] rd[2..0]. - kThumbMovRR_H2L, // mov(3) [01000110] H12[01] rm[5..3] rd[2..0]. - kThumbMovRR_L2H, // mov(3) [01000101] H12[10] rm[5..3] rd[2..0]. - kThumbMul, // mul [0100001101] rm[5..3] rd[2..0]. - kThumbMvn, // mvn [0100001111] rm[5..3] rd[2..0]. - kThumbNeg, // neg [0100001001] rm[5..3] rd[2..0]. - kThumbOrr, // orr [0100001100] rm[5..3] rd[2..0]. - kThumbPop, // pop [1011110] r[8..8] rl[7..0]. - kThumbPush, // push [1011010] r[8..8] rl[7..0]. - kThumbRev, // rev [1011101000] rm[5..3] rd[2..0] - kThumbRevsh, // revsh [1011101011] rm[5..3] rd[2..0] - kThumbRorRR, // ror [0100000111] rs[5..3] rd[2..0]. - kThumbSbc, // sbc [0100000110] rm[5..3] rd[2..0]. - kThumbStmia, // stmia [11000] rn[10..8] reglist [7.. 0]. - kThumbStrRRI5, // str(1) [01100] imm_5[10..6] rn[5..3] rd[2..0]. - kThumbStrRRR, // str(2) [0101000] rm[8..6] rn[5..3] rd[2..0]. - kThumbStrSpRel, // str(3) [10010] rd[10..8] imm_8[7..0]. - kThumbStrbRRI5, // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0]. - kThumbStrbRRR, // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0]. - kThumbStrhRRI5, // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0]. - kThumbStrhRRR, // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0]. - kThumbSubRRI3, // sub(1) [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/ - kThumbSubRI8, // sub(2) [00111] rd[10..8] imm_8[7..0]. - kThumbSubRRR, // sub(3) [0001101] rm[8..6] rn[5..3] rd[2..0]. - kThumbSubSpI7, // sub(4) [101100001] imm_7[6..0]. - kThumbSwi, // swi [11011111] imm_8[7..0]. - kThumbTst, // tst [0100001000] rm[5..3] rn[2..0]. - kThumb2Vldrs, // vldr low sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0]. - kThumb2Vldrd, // vldr low dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0]. - kThumb2Vmuls, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0]. - kThumb2Vmuld, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0]. - kThumb2Vstrs, // vstr low sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0]. - kThumb2Vstrd, // vstr low dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0]. - kThumb2Vsubs, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0]. - kThumb2Vsubd, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0]. - kThumb2Vadds, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0]. - kThumb2Vaddd, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0]. - kThumb2Vdivs, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0]. - kThumb2Vdivd, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0]. - kThumb2VmlaF64, // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0]. - kThumb2VcvtIF, // vcvt.F32.S32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0]. - kThumb2VcvtFI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0]. - kThumb2VcvtDI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0]. - kThumb2VcvtFd, // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0]. - kThumb2VcvtDF, // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0]. - kThumb2VcvtF64S32, // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0]. - kThumb2VcvtF64U32, // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0]. - kThumb2Vsqrts, // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0]. - kThumb2Vsqrtd, // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0]. - kThumb2MovI8M, // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8. - kThumb2MovImm16, // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8. - kThumb2StrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0]. - kThumb2LdrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0]. - kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0]. - kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0]. - kThumb2Cbnz, // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0]. - kThumb2Cbz, // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0]. - kThumb2AddRRI12, // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2MovRR, // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0]. - kThumb2Vmovs, // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0]. - kThumb2Vmovd, // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0]. - kThumb2Ldmia, // ldmia [111010001001] rn[19..16] mask[15..0]. - kThumb2Stmia, // stmia [111010001000] rn[19..16] mask[15..0]. - kThumb2AddRRR, // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2SubRRR, // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2SbcRRR, // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2CmpRR, // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0]. - kThumb2SubRRI12, // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2MvnI8M, // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8. - kThumb2Sel, // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0]. - kThumb2Ubfx, // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0]. - kThumb2Sbfx, // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0]. - kThumb2LdrRRR, // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2LdrhRRR, // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2LdrshRRR, // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2LdrbRRR, // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2LdrsbRRR, // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2StrRRR, // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2StrhRRR, // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2StrbRRR, // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. - kThumb2LdrhRRI12, // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0]. - kThumb2LdrshRRI12, // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0]. - kThumb2LdrbRRI12, // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0]. - kThumb2LdrsbRRI12, // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0]. - kThumb2StrhRRI12, // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0]. - kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0]. - kThumb2Pop, // pop [1110100010111101] list[15-0]*/ - kThumb2Push, // push [1110100100101101] list[15-0]*/ - kThumb2CmpRI8M, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. - kThumb2CmnRI8M, // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0]. - kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0]. - kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. - kThumb2SdivRRR, // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. - kThumb2UdivRRR, // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. - kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0]. - kThumb2RsubRRI8M, // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2NegRR, // actually rsub rd, rn, #0. - kThumb2OrrRRR, // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2TstRR, // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0]. - kThumb2LslRRR, // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. - kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. - kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. - kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. - kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0]. - kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0]. - kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0]. - kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0]. - kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2EorRRI8M, // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2AddRRI8M, // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2AdcRRI8M, // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2SubRRI8M, // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2SbcRRI8M, // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. - kThumb2RevRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0] - kThumb2RevshRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0] - kThumb2It, // it [10111111] firstcond[7-4] mask[3-0]. - kThumb2Fmstat, // fmstat [11101110111100011111101000010000]. - kThumb2Vcmpd, // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0]. - kThumb2Vcmps, // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0]. - kThumb2LdrPcRel12, // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0]. - kThumb2BCond, // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0]. - kThumb2Fmrs, // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000]. - kThumb2Fmsr, // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000]. - kThumb2Fmrrd, // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. - kThumb2Fmdrr, // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. - kThumb2Vabsd, // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0]. - kThumb2Vabss, // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. - kThumb2Vnegd, // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0]. - kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. - kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0]. - kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0]. - kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0]. - kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. - kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0]. - kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111]. - kThumb2Strex, // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0]. - kThumb2Strexd, // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0]. - kThumb2Clrex, // clrex [11110011101111111000111100101111]. - kThumb2Bfi, // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. - kThumb2Bfc, // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. - kThumb2Dmb, // dmb [1111001110111111100011110101] option[3-0]. - kThumb2LdrPcReln12, // ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0]. - kThumb2Stm, // stm <list> [111010010000] rn[19-16] 000 rl[12-0]. - kThumbUndefined, // undefined [11011110xxxxxxxx]. - kThumb2VPopCS, // vpop <list of callee save fp singles (s16+). - kThumb2VPushCS, // vpush <list callee save fp singles (s16+). - kThumb2Vldms, // vldms rd, <list>. - kThumb2Vstms, // vstms rd, <list>. - kThumb2BUncond, // b <label>. - kThumb2MovImm16H, // similar to kThumb2MovImm16, but target high hw. - kThumb2AddPCR, // Thumb2 2-operand add with hard-coded PC target. - kThumb2Adr, // Special purpose encoding of ADR for switch tables. - kThumb2MovImm16LST, // Special purpose version for switch table use. - kThumb2MovImm16HST, // Special purpose version for switch table use. - kThumb2LdmiaWB, // ldmia [111010011001[ rn[19..16] mask[15..0]. - kThumb2OrrRRRs, // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2Push1, // t3 encoding of push. - kThumb2Pop1, // t3 encoding of pop. - kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. - kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. - kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024. - kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024]. - kThumb2StrdI8, // strd rt, rt2, [rn +-/1024]. - kArmLast, + kA64First = 0, + kA64Adc3rrr = kA64First, // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0]. + kA64Add4RRdT, // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0]. + kA64Add4rrro, // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. + kA64Adr2xd, // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0]. + kA64And3Rrl, // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. + kA64And4rrro, // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. + kA64Asr3rrd, // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0]. + kA64Asr3rrr, // asr alias of "sbfm arg0, arg1, arg2, {#31/#63}". + kA64B2ct, // b.cond [01010100] imm_19[23-5] [0] cond[3-0]. + kA64Blr1x, // blr [1101011000111111000000] rn[9-5] [00000]. + kA64Br1x, // br [1101011000011111000000] rn[9-5] [00000]. + kA64Brk1d, // brk [11010100001] imm_16[20-5] [00000]. + kA64B1t, // b [00010100] offset_26[25-0]. + kA64Cbnz2rt, // cbnz[00110101] imm_19[23-5] rt[4-0]. + kA64Cbz2rt, // cbz [00110100] imm_19[23-5] rt[4-0]. + kA64Cmn3Rro, // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111]. + kA64Cmn3RdT, // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111]. + kA64Cmp3Rro, // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111]. + kA64Cmp3RdT, // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111]. + kA64Csel4rrrc, // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0]. + kA64Csinc4rrrc, // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0]. + kA64Csneg4rrrc, // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0]. + kA64Dmb1B, // dmb [11010101000000110011] CRm[11-8] [10111111]. + kA64Eor3Rrl, // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. + kA64Eor4rrro, // eor [s1001010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. + kA64Extr4rrrd, // extr[s00100111N0] rm[20-16] imm_s[15-10] rn[9-5] rd[4-0]. + kA64Fabs2ff, // fabs[000111100s100000110000] rn[9-5] rd[4-0]. + kA64Fadd3fff, // fadd[000111100s1] rm[20-16] [001010] rn[9-5] rd[4-0]. + kA64Fcmp1f, // fcmp[000111100s100000001000] rn[9-5] [01000]. + kA64Fcmp2ff, // fcmp[000111100s1] rm[20-16] [001000] rn[9-5] [00000]. + kA64Fcvtzs2wf, // fcvtzs [000111100s111000000000] rn[9-5] rd[4-0]. + kA64Fcvtzs2xf, // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0]. + kA64Fcvt2Ss, // fcvt [0001111000100010110000] rn[9-5] rd[4-0]. + kA64Fcvt2sS, // fcvt [0001111001100010010000] rn[9-5] rd[4-0]. + kA64Fdiv3fff, // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0]. + kA64Fmov2ff, // fmov[000111100s100000010000] rn[9-5] rd[4-0]. + kA64Fmov2fI, // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0]. + kA64Fmov2sw, // fmov[0001111000100111000000] rn[9-5] rd[4-0]. + kA64Fmov2Sx, // fmov[1001111001100111000000] rn[9-5] rd[4-0]. + kA64Fmov2ws, // fmov[0001111001101110000000] rn[9-5] rd[4-0]. + kA64Fmov2xS, // fmov[1001111001101111000000] rn[9-5] rd[4-0]. + kA64Fmul3fff, // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0]. + kA64Fneg2ff, // fneg[000111100s100001010000] rn[9-5] rd[4-0]. + kA64Frintz2ff, // frintz [000111100s100101110000] rn[9-5] rd[4-0]. + kA64Fsqrt2ff, // fsqrt[000111100s100001110000] rn[9-5] rd[4-0]. + kA64Fsub3fff, // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0]. + kA64Ldrb3wXd, // ldrb[0011100101] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Ldrb3wXx, // ldrb[00111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64Ldrsb3rXd, // ldrsb[001110011s] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Ldrsb3rXx, // ldrsb[0011 1000 1s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64Ldrh3wXF, // ldrh[0111100101] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Ldrh4wXxd, // ldrh[01111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64Ldrsh3rXF, // ldrsh[011110011s] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Ldrsh4rXxd, // ldrsh[011110001s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0] + kA64Ldr2fp, // ldr [0s011100] imm_19[23-5] rt[4-0]. + kA64Ldr2rp, // ldr [0s011000] imm_19[23-5] rt[4-0]. + kA64Ldr3fXD, // ldr [1s11110100] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Ldr3rXD, // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0]. + kA64Ldr4fXxG, // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64Ldr4rXxG, // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64LdrPost3rXd, // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0]. + kA64Ldp4rrXD, // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. + kA64LdpPost4rrXD, // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. + kA64Ldur3fXd, // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0]. + kA64Ldur3rXd, // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0]. + kA64Ldxr2rX, // ldxr[1s00100001011111011111] rn[9-5] rt[4-0]. + kA64Lsl3rrr, // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0]. + kA64Lsr3rrd, // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}". + kA64Lsr3rrr, // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0]. + kA64Movk3rdM, // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0]. + kA64Movn3rdM, // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0]. + kA64Movz3rdM, // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0]. + kA64Mov2rr, // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0]. + kA64Mvn2rr, // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0]. + kA64Mul3rrr, // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0]. + kA64Neg3rro, // neg alias of "sub arg0, rzr, arg1, arg2". + kA64Orr3Rrl, // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. + kA64Orr4rrro, // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. + kA64Ret, // ret [11010110010111110000001111000000]. + kA64Rev2rr, // rev [s10110101100000000001x] rn[9-5] rd[4-0]. + kA64Rev162rr, // rev16[s101101011000000000001] rn[9-5] rd[4-0]. + kA64Ror3rrr, // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0]. + kA64Sbc3rrr, // sbc [s0011010000] rm[20-16] [000000] rn[9-5] rd[4-0]. + kA64Sbfm4rrdd, // sbfm[0001001100] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. + kA64Scvtf2fw, // scvtf [000111100s100010000000] rn[9-5] rd[4-0]. + kA64Scvtf2fx, // scvtf [100111100s100010000000] rn[9-5] rd[4-0]. + kA64Sdiv3rrr, // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0]. + kA64Smaddl4xwwx, // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0]. + kA64Stp4rrXD, // stp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. + kA64StpPost4rrXD, // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. + kA64StpPre4rrXD, // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0]. + kA64Str3fXD, // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Str4fXxG, // str [1s111100001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64Str3rXD, // str [1s11100100] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Str4rXxG, // str [1s111000001] rm[20-16] option[15-13] S[12-12] [10] rn[9-5] rt[4-0]. + kA64Strb3wXd, // strb[0011100100] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Strb3wXx, // strb[00111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64Strh3wXF, // strh[0111100100] imm_12[21-10] rn[9-5] rt[4-0]. + kA64Strh4wXxd, // strh[01111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]. + kA64StrPost3rXd, // str [1s111000000] imm_9[20-12] [01] rn[9-5] rt[4-0]. + kA64Stur3fXd, // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0]. + kA64Stur3rXd, // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0]. + kA64Stxr3wrX, // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0]. + kA64Sub4RRdT, // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0]. + kA64Sub4rrro, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. + kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0]. + kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3". + kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. + kA64Last, + kA64NotWide = 0, // Flag used to select the first instruction variant. + kA64Wide = 0x1000 // Flag used to select the second instruction variant. }; +/* + * The A64 instruction set provides two variants for many instructions. For example, "mov wN, wM" + * and "mov xN, xM" or - for floating point instructions - "mov sN, sM" and "mov dN, dM". + * It definitely makes sense to exploit this symmetries of the instruction set. We do this via the + * WIDE, UNWIDE macros. For opcodes that allow it, the wide variant can be obtained by applying the + * WIDE macro to the non-wide opcode. E.g. WIDE(kA64Sub4RRdT). + */ + +// Return the wide and no-wide variants of the given opcode. +#define WIDE(op) ((ArmOpcode)((op) | kA64Wide)) +#define UNWIDE(op) ((ArmOpcode)((op) & ~kA64Wide)) + +// Whether the given opcode is wide. +#define IS_WIDE(op) (((op) & kA64Wide) != 0) + +/* + * Floating point variants. These are just aliases of the macros above which we use for floating + * point instructions, just for readibility reasons. + * TODO(Arm64): should we remove these and use the original macros? + */ +#define FWIDE WIDE +#define FUNWIDE UNWIDE +#define IS_FWIDE IS_WIDE + +#define OP_KIND_UNWIDE(opcode) (opcode) +#define OP_KIND_IS_WIDE(opcode) (false) + enum ArmOpDmbOptions { kSY = 0xf, kST = 0xe, @@ -551,40 +385,63 @@ enum ArmOpDmbOptions { // Instruction assembly field_loc kind. enum ArmEncodingKind { - kFmtUnused, // Unused field and marks end of formats. + // All the formats below are encoded in the same way (as a kFmtBitBlt). + // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ..."). + kFmtRegW = 0, // Word register (w) or wzr. + kFmtRegX, // Extended word register (x) or xzr. + kFmtRegR, // Register with same width as the instruction or zr. + kFmtRegWOrSp, // Word register (w) or wsp. + kFmtRegXOrSp, // Extended word register (x) or sp. + kFmtRegROrSp, // Register with same width as the instruction or sp. + kFmtRegS, // Single FP reg. + kFmtRegD, // Double FP reg. + kFmtRegF, // Single/double FP reg depending on the instruction width. kFmtBitBlt, // Bit string using end/start. - kFmtDfp, // Double FP reg. - kFmtSfp, // Single FP reg. - kFmtModImm, // Shifted 8-bit immed using [26,14..12,7..0]. - kFmtImm16, // Zero-extended immed using [26,19..16,14..12,7..0]. - kFmtImm6, // Encoded branch target using [9,7..3]0. - kFmtImm12, // Zero-extended immediate using [26,14..12,7..0]. - kFmtShift, // Shift descriptor, [14..12,7..4]. - kFmtLsb, // least significant bit using [14..12][7..6]. - kFmtBWidth, // bit-field width, encoded as width-1. - kFmtShift5, // Shift count, [14..12,7..6]. - kFmtBrOffset, // Signed extended [26,11,13,21-16,10-0]:0. - kFmtFPImm, // Encoded floating point immediate. - kFmtOff24, // 24-bit Thumb2 unconditional branch encoding. + + // Less likely formats. + kFmtUnused, // Unused field and marks end of formats. + kFmtImm21, // Sign-extended immediate using [23..5,30..29]. + kFmtShift, // Register shift, 9-bit at [23..21, 15..10].. + kFmtExtend, // Register extend, 9-bit at [23..21, 15..10]. kFmtSkip, // Unused field, but continue to next. }; -// Struct used to define the snippet positions for each Thumb opcode. +// Struct used to define the snippet positions for each A64 opcode. struct ArmEncodingMap { - uint32_t skeleton; + uint32_t wskeleton; + uint32_t xskeleton; struct { ArmEncodingKind kind; - int end; // end for kFmtBitBlt, 1-bit slice end for FP regs. - int start; // start for kFmtBitBlt, 4-bit slice end for FP regs. + int end; // end for kFmtBitBlt, 1-bit slice end for FP regs. + int start; // start for kFmtBitBlt, 4-bit slice end for FP regs. } field_loc[4]; - ArmOpcode opcode; + ArmOpcode opcode; // can be WIDE()-ned to indicate it has a wide variant. uint64_t flags; const char* name; const char* fmt; - int size; // Note: size is in bytes. + int size; // Note: size is in bytes. FixupKind fixup; }; +#if 0 +// TODO(Arm64): try the following alternative, which fits exactly in one cache line (64 bytes). +struct ArmEncodingMap { + uint32_t wskeleton; + uint32_t xskeleton; + uint64_t flags; + const char* name; + const char* fmt; + struct { + uint8_t kind; + int8_t end; // end for kFmtBitBlt, 1-bit slice end for FP regs. + int8_t start; // start for kFmtBitBlt, 4-bit slice end for FP regs. + } field_loc[4]; + uint32_t fixup; + uint32_t opcode; // can be WIDE()-ned to indicate it has a wide variant. + uint32_t padding[3]; +}; +#endif + } // namespace art #endif // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_ diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index e79ebad00c..8accd0af93 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -20,26 +20,47 @@ namespace art { +// The macros below are exclusively used in the encoding map. + +// Most generic way of providing two variants for one instructions. +#define CUSTOM_VARIANTS(variant1, variant2) variant1, variant2 + +// Used for instructions which do not have a wide variant. +#define NO_VARIANTS(variant) \ + CUSTOM_VARIANTS(variant, 0) + +// Used for instructions which have a wide variant with the sf bit set to 1. +#define SF_VARIANTS(sf0_skeleton) \ + CUSTOM_VARIANTS(sf0_skeleton, (sf0_skeleton | 0x80000000)) + +// Used for instructions which have a wide variant with the size bits set to either x0 or x1. +#define SIZE_VARIANTS(sizex0_skeleton) \ + CUSTOM_VARIANTS(sizex0_skeleton, (sizex0_skeleton | 0x40000000)) + +// Used for instructions which have a wide variant with the sf and n bits set to 1. +#define SF_N_VARIANTS(sf0_n0_skeleton) \ + CUSTOM_VARIANTS(sf0_n0_skeleton, (sf0_n0_skeleton | 0x80400000)) + +// Used for FP instructions which have a single and double precision variants, with he type bits set +// to either 00 or 01. +#define FLOAT_VARIANTS(type00_skeleton) \ + CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000)) + /* * opcode: ArmOpcode enum - * skeleton: pre-designated bit-pattern for this opcode - * k0: key to applying ds/de - * ds: dest start bit position - * de: dest end bit position - * k1: key to applying s1s/s1e - * s1s: src1 start bit position - * s1e: src1 end bit position - * k2: key to applying s2s/s2e - * s2s: src2 start bit position - * s2e: src2 end bit position - * operands: number of operands (for sanity check purposes) + * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros. + * a{n}k: key to applying argument {n} \ + * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3 + * a{n}e: argument {n} end bit position / + * flags: instruction attributes (used in optimization) * name: mnemonic name * fmt: for pretty-printing + * fixup: used for second-pass fixes (e.g. adresses fixups in branch instructions). */ -#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ - k3, k3s, k3e, flags, name, fmt, size, fixup) \ - {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ - {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup} +#define ENCODING_MAP(opcode, variants, a0k, a0s, a0e, a1k, a1s, a1e, a2k, a2s, a2e, \ + a3k, a3s, a3e, flags, name, fmt, fixup) \ + {variants, {{a0k, a0s, a0e}, {a1k, a1s, a1e}, {a2k, a2s, a2e}, \ + {a3k, a3s, a3e}}, opcode, flags, name, fmt, 4, fixup} /* Instruction dump string format keys: !pf, where "!" is the start * of the key, "p" is which numeric operand to use and "f" is the @@ -52,989 +73,475 @@ namespace art { * 3 -> operands[3] (extra) * * [f]ormats: - * h -> 4-digit hex * d -> decimal + * D -> decimal*4 or decimal*8 depending on the instruction width * E -> decimal*4 * F -> decimal*2 - * c -> branch condition (beq, bne, etc.) + * G -> ", lsl #2" or ", lsl #3" depending on the instruction width + * c -> branch condition (eq, ne, etc.) * t -> pc-relative target - * u -> 1st half of bl[x] target - * v -> 2nd half ob bl[x] target - * R -> register list + * p -> pc-relative address * s -> single precision floating point register * S -> double precision floating point register - * m -> Thumb2 modified immediate - * n -> complimented Thumb2 modified immediate - * M -> Thumb2 16-bit zero-extended immediate - * b -> 4-digit binary + * f -> single or double precision register (depending on instruction width) + * I -> 8-bit immediate floating point number + * l -> logical immediate + * M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...) * B -> dmb option string (sy, st, ish, ishst, nsh, hshst) * H -> operand shift - * C -> core register name - * P -> fp cs register list (base of s16) - * Q -> fp cs register list (base of s0) + * T -> register shift (either ", lsl #0" or ", lsl #12") + * e -> register extend (e.g. uxtb #1) + * o -> register shift (e.g. lsl #1) for Word registers + * w -> word (32-bit) register wn, or wzr + * W -> word (32-bit) register wn, or wsp + * x -> extended (64-bit) register xn, or xzr + * X -> extended (64-bit) register xn, or sp + * r -> register with same width as instruction, r31 -> wzr, xzr + * R -> register with same width as instruction, r31 -> wsp, sp * * [!] escape. To insert "!", use "!!" */ -/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */ -const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kArmLast] = { - ENCODING_MAP(kArm16BitData, 0x0000, - kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone), - ENCODING_MAP(kThumbAdcRR, 0x4140, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES, - "adcs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbAddRRI3, 0x1c00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "adds", "!0C, !1C, #!2d", 2, kFixupNone), - ENCODING_MAP(kThumbAddRI8, 0x3000, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, - "adds", "!0C, !0C, #!1d", 2, kFixupNone), - ENCODING_MAP(kThumbAddRRR, 0x1800, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, - "adds", "!0C, !1C, !2C", 2, kFixupNone), - ENCODING_MAP(kThumbAddRRLH, 0x4440, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbAddRRHL, 0x4480, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbAddRRHH, 0x44c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbAddPcRel, 0xa000, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP, - "add", "!0C, pc, #!1E", 2, kFixupLoad), - ENCODING_MAP(kThumbAddSpRel, 0xa800, - kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP, - "add", "!0C, sp, #!2E", 2, kFixupNone), - ENCODING_MAP(kThumbAddSpI7, 0xb000, - kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, - "add", "sp, #!0d*4", 2, kFixupNone), - ENCODING_MAP(kThumbAndRR, 0x4000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "ands", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbAsrRRI5, 0x1000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "asrs", "!0C, !1C, #!2d", 2, kFixupNone), - ENCODING_MAP(kThumbAsrRR, 0x4100, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "asrs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbBCond, 0xd000, - kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, +/* NOTE: must be kept in sync with enum ArmOpcode from arm64_lir.h */ +const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { + ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "adc", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000), + kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1, + "add", "!0R, !1R, #!2d!3T", kFixupNone), + ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1, + "add", "!0r, !1r, !2r!3o", kFixupNone), + // Note: adr is binary, but declared as tertiary. The third argument is used while doing the + // fixups and contains information to identify the adr label. + ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000), + kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, + "adr", "!0x, #!1d", kFixupAdr), + ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000), + kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "and", "!0R, !1r, #!2l", kFixupNone), + ENCODING_MAP(WIDE(kA64And4rrro), SF_VARIANTS(0x0a000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "and", "!0r, !1r, !2r!3o", kFixupNone), + ENCODING_MAP(WIDE(kA64Asr3rrd), CUSTOM_VARIANTS(0x13007c00, 0x9340fc00), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "asr", "!0r, !1r, #!2d", kFixupNone), + ENCODING_MAP(WIDE(kA64Asr3rrr), SF_VARIANTS(0x1ac02800), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "asr", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(kA64B2ct, NO_VARIANTS(0x54000000), + kFmtBitBlt, 3, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | - NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch), - ENCODING_MAP(kThumbBUncond, 0xe000, - kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, - "b", "!0t", 2, kFixupT1Branch), - ENCODING_MAP(kThumbBicRR, 0x4380, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "bics", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbBkpt, 0xbe00, - kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bkpt", "!0d", 2, kFixupNone), - ENCODING_MAP(kThumbBlx1, 0xf000, - kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | - NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1), - ENCODING_MAP(kThumbBlx2, 0xe800, - kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | - NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel), - ENCODING_MAP(kThumbBl1, 0xf000, - kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, - "bl_1", "!0u", 2, kFixupBl1), - ENCODING_MAP(kThumbBl2, 0xf800, - kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, - "bl_2", "!0v", 2, kFixupLabel), - ENCODING_MAP(kThumbBlxR, 0x4780, - kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + NEEDS_FIXUP, "b.!0c", "!1t", kFixupCondBranch), + ENCODING_MAP(kA64Blr1x, NO_VARIANTS(0xd63f0000), + kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR, - "blx", "!0C", 2, kFixupNone), - ENCODING_MAP(kThumbBx, 0x4700, - kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + "blr", "!0x", kFixupNone), + ENCODING_MAP(kA64Br1x, NO_VARIANTS(0xd61f0000), + kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH, + "br", "!0x", kFixupNone), + ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000), + kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bx", "!0C", 2, kFixupNone), - ENCODING_MAP(kThumbCmnRR, 0x42c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmn", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbCmpRI8, 0x2800, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmp", "!0C, #!1d", 2, kFixupNone), - ENCODING_MAP(kThumbCmpRR, 0x4280, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbCmpLH, 0x4540, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbCmpHL, 0x4580, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbCmpHH, 0x45c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + "brk", "!0d", kFixupNone), + ENCODING_MAP(kA64B1t, NO_VARIANTS(0x14000000), + kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, + "b", "!0t", kFixupT1Branch), + ENCODING_MAP(WIDE(kA64Cbnz2rt), SF_VARIANTS(0x35000000), + kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, + "cbnz", "!0r, !1t", kFixupCBxZ), + ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000), + kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, + "cbz", "!0r, !1t", kFixupCBxZ), + ENCODING_MAP(WIDE(kA64Cmn3Rro), SF_VARIANTS(0x6b20001f), + kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, + "cmn", "!0R, !1r!2o", kFixupNone), + ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f), + kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, + "cmn", "!0R, #!1d!2T", kFixupNone), + ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b20001f), + kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0R, !1r!2o", kFixupNone), + ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f), + kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, + "cmp", "!0R, #!1d!2T", kFixupNone), + ENCODING_MAP(WIDE(kA64Csel4rrrc), SF_VARIANTS(0x1a800000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES, + "csel", "!0r, !1r, !2r, !3c", kFixupNone), + ENCODING_MAP(WIDE(kA64Csinc4rrrc), SF_VARIANTS(0x1a800400), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES, + "csinc", "!0r, !1r, !2r, !3c", kFixupNone), + ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES, + "csneg", "!0r, !1r, !2r, !3c", kFixupNone), + ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf), + kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, + "dmb", "#!0B", kFixupNone), + ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000), + kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "eor", "!0R, !1r, #!2l", kFixupNone), + ENCODING_MAP(WIDE(kA64Eor4rrro), SF_VARIANTS(0x4a000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "eor", "!0r, !1r, !2r!3o", kFixupNone), + ENCODING_MAP(WIDE(kA64Extr4rrrd), SF_N_VARIANTS(0x13800000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12, + "extr", "!0r, !1r, !2r, #!3d", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1, + "fabs", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "fadd", "!0f, !1f, !2f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008), + kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES, + "fcmp", "!0f, #0", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000), + kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbEorRR, 0x4040, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "eors", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbLdmia, 0xc800, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 2, kFixupNone), - ENCODING_MAP(kThumbLdrRRI5, 0x6800, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrRRR, 0x5800, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldr", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrPcRel, 0x4800, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC - | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad), - ENCODING_MAP(kThumbLdrSpRel, 0x9800, - kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP - | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrbRRI5, 0x7800, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrbRRR, 0x5c00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrhRRI5, 0x8800, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrhRRR, 0x5a00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrsbRRR, 0x5600, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbLdrshRRR, 0x5e00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbLslRRI5, 0x0000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "lsls", "!0C, !1C, #!2d", 2, kFixupNone), - ENCODING_MAP(kThumbLslRR, 0x4080, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "lsls", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbLsrRRI5, 0x0800, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "lsrs", "!0C, !1C, #!2d", 2, kFixupNone), - ENCODING_MAP(kThumbLsrRR, 0x40c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "lsrs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbMovImm, 0x2000, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0 | SETS_CCODES, - "movs", "!0C, #!1d", 2, kFixupNone), - ENCODING_MAP(kThumbMovRR, 0x1c00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "movs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbMovRR_H2H, 0x46c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + "fcmp", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000), + kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbMovRR_H2L, 0x4640, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + "fcvtzs", "!0w, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000), + kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbMovRR_L2H, 0x4680, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + "fcvtzs", "!0x, !1f", kFixupNone), + ENCODING_MAP(kA64Fcvt2Ss, NO_VARIANTS(0x1e22C000), + kFmtRegD, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbMul, 0x4340, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "muls", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbMvn, 0x43c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "mvns", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbNeg, 0x4240, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "negs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbOrr, 0x4300, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "orrs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbPop, 0xbc00, - kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 - | IS_LOAD, "pop", "<!0R>", 2, kFixupNone), - ENCODING_MAP(kThumbPush, 0xb400, - kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 - | IS_STORE, "push", "<!0R>", 2, kFixupNone), - ENCODING_MAP(kThumbRev, 0xba00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE1, - "rev", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbRevsh, 0xbac0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE1, - "rev", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbRorRR, 0x41c0, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "rors", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbSbc, 0x4180, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES, - "sbcs", "!0C, !1C", 2, kFixupNone), - ENCODING_MAP(kThumbStmia, 0xc000, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE, - "stmia", "!0C!!, <!1R>", 2, kFixupNone), - ENCODING_MAP(kThumbStrRRI5, 0x6000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #!2E]", 2, kFixupNone), - ENCODING_MAP(kThumbStrRRR, 0x5000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "str", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbStrSpRel, 0x9000, - kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP - | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone), - ENCODING_MAP(kThumbStrbRRI5, 0x7000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strb", "!0C, [!1C, #!2d]", 2, kFixupNone), - ENCODING_MAP(kThumbStrbRRR, 0x5400, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "strb", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbStrhRRI5, 0x8000, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strh", "!0C, [!1C, #!2F]", 2, kFixupNone), - ENCODING_MAP(kThumbStrhRRR, 0x5200, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "strh", "!0C, [!1C, !2C]", 2, kFixupNone), - ENCODING_MAP(kThumbSubRRI3, 0x1e00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C, !1C, #!2d", 2, kFixupNone), - ENCODING_MAP(kThumbSubRI8, 0x3800, - kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, - "subs", "!0C, #!1d", 2, kFixupNone), - ENCODING_MAP(kThumbSubRRR, 0x1a00, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, - "subs", "!0C, !1C, !2C", 2, kFixupNone), - ENCODING_MAP(kThumbSubSpI7, 0xb080, - kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, - "sub", "sp, #!0d*4", 2, kFixupNone), - ENCODING_MAP(kThumbSwi, 0xdf00, - kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "swi", "!0d", 2, kFixupNone), - ENCODING_MAP(kThumbTst, 0x4200, - kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES, - "tst", "!0C, !1C", 2, kFixupNone), - /* - * Note: The encoding map entries for vldrd and vldrs include REG_DEF_LR, even though - * these instructions don't define lr. The reason is that these instructions - * are used for loading values from the literal pool, and the displacement may be found - * to be insuffient at assembly time. In that case, we need to materialize a new base - * register - and will use lr as the temp register. This works because lr is used as - * a temp register in very limited situations, and never in conjunction with a floating - * point constant load. However, it is possible that during instruction scheduling, - * another use of lr could be moved across a vldrd/vldrs. By setting REG_DEF_LR, we - * prevent that from happening. Note that we set REG_DEF_LR on all vldrd/vldrs - even those - * not used in a pc-relative case. It is really only needed on the pc-relative loads, but - * the case we're handling is rare enough that it seemed not worth the trouble to distinguish. - */ - ENCODING_MAP(kThumb2Vldrs, 0xed900a00, - kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | - REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad), - ENCODING_MAP(kThumb2Vldrd, 0xed900b00, - kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | - REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad), - ENCODING_MAP(kThumb2Vmuls, 0xee200a00, - kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE12, - "vmuls", "!0s, !1s, !2s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vmuld, 0xee200b00, - kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vmuld", "!0S, !1S, !2S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vstrs, 0xed800a00, - kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone), - ENCODING_MAP(kThumb2Vstrd, 0xed800b00, - kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone), - ENCODING_MAP(kThumb2Vsubs, 0xee300a40, - kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vsub", "!0s, !1s, !2s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vsubd, 0xee300b40, - kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vsub", "!0S, !1S, !2S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vadds, 0xee300a00, - kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vadd", "!0s, !1s, !2s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vaddd, 0xee300b00, - kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vadd", "!0S, !1S, !2S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vdivs, 0xee800a00, - kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vdivs", "!0s, !1s, !2s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vdivd, 0xee800b00, - kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vdivd", "!0S, !1S, !2S", 4, kFixupNone), - ENCODING_MAP(kThumb2VmlaF64, 0xee000b00, - kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012, - "vmla", "!0S, !1S, !2S", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "fcvt", "!0S, !1s", kFixupNone), + ENCODING_MAP(kA64Fcvt2sS, NO_VARIANTS(0x1e624000), + kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f32.s32", "!0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtFI, 0xeebd0ac0, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "fcvt", "!0s, !1S", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "fdiv", "!0f, !1f, !2f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtDI, 0xeebd0bc0, - kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + "fmov", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000), + kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "fmov", "!0f, #!1I", kFixupNone), + ENCODING_MAP(kA64Fmov2sw, NO_VARIANTS(0x1e270000), + kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtFd, 0xeeb70ac0, - kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "fmov", "!0s, !1w", kFixupNone), + ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000), + kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtDF, 0xeeb70bc0, - kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + "fmov", "!0S, !1x", kFixupNone), + ENCODING_MAP(kA64Fmov2ws, NO_VARIANTS(0x1e260000), + kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtF64S32, 0xeeb80bc0, - kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "fmov", "!0w, !1s", kFixupNone), + ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000), + kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2VcvtF64U32, 0xeeb80b40, - kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "fmov", "!0x, !1S", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "fmul", "!0f, !1f, !2f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "fneg", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vsqrtd, 0xeeb10bc0, - kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + "frintz", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */ - kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mov", "!0C, #!1m", 4, kFixupNone), - ENCODING_MAP(kThumb2MovImm16, 0xf2400000, - kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mov", "!0C, #!1M", 4, kFixupNone), - ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + "fsqrt", "!0f, !1f", kFixupNone), + ENCODING_MAP(FWIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800), + kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "fsub", "!0f, !1f, !2f", kFixupNone), + ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #-!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, + "ldrb", "!0w, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrb", "!0w, [!1X, !2x]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */ - kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | - NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ), - ENCODING_MAP(kThumb2Cbz, 0xb100, /* Note: does not affect flags */ - kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | - NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ), - ENCODING_MAP(kThumb2AddRRI12, 0xf2000000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ - "add", "!0C,!1C,#!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2MovRR, 0xea4f0000, /* no setflags encoding */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 4, kFixupNone), - ENCODING_MAP(kThumb2Vmovs, 0xeeb00a40, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f32 ", " !0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vmovd, 0xeeb00b40, - kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f64 ", " !0S, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2Ldmia, 0xe8900000, - kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 4, kFixupNone), - ENCODING_MAP(kThumb2Stmia, 0xe8800000, - kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE, - "stmia", "!0C!!, <!1R>", 4, kFixupNone), - ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, - IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "adds", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2SubRRR, 0xebb00000, /* setflags enconding */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, - IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "subs", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2SbcRRR, 0xeb700000, /* setflags encoding */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, - IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES, - "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2CmpRR, 0xebb00f00, - kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 4, kFixupNone), - ENCODING_MAP(kThumb2SubRRI12, 0xf2a00000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ - "sub", "!0C,!1C,#!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2MvnI8M, 0xf06f0000, /* no setflags encoding */ - kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mvn", "!0C, #!1n", 4, kFixupNone), - ENCODING_MAP(kThumb2Sel, 0xfaa0f080, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES, - "sel", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2Ubfx, 0xf3c00000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, - kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), - ENCODING_MAP(kThumb2Sbfx, 0xf3400000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, - kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrRRR, 0xf8500000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrRRR, 0xf8400000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrhRRR, 0xf8200000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrbRRR, 0xf8000000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + "ldrsb", "!0r, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsb", "!0r, [!1X, !2x]", kFixupNone), + ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + "ldrh", "!0w, [!1X, #!2F]", kFixupNone), + ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + "ldrsh", "!0r, [!1X, #!2F]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone), + ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000), + kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldr", "!0f, !1p", kFixupLoad), + ENCODING_MAP(WIDE(kA64Ldr2rp), SIZE_VARIANTS(0x18000000), + kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldr", "!0r, !1p", kFixupLoad), + ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000), + kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + "ldr", "!0f, [!1X, #!2D]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strh", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strb", "!0C, [!1C, #!2d]", 4, kFixupNone), - ENCODING_MAP(kThumb2Pop, 0xe8bd0000, - kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 - | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop), - ENCODING_MAP(kThumb2Push, 0xe92d0000, - kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 - | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop), - ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00, - kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmp", "!0C, #!1m", 4, kFixupNone), - ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00, - kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmn", "!0C, #!1m", 4, kFixupNone), - ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, - IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2AndRRR, 0xea000000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "and", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2BicRRR, 0xea200000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "bic", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2CmnRR, 0xeb000000, - kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "cmn", "!0C, !1C, shift !2d", 4, kFixupNone), - ENCODING_MAP(kThumb2EorRRR, 0xea800000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "eor", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2MulRRR, 0xfb00f000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "mul", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2SdivRRR, 0xfb90f0f0, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "sdiv", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2UdivRRR, 0xfbb0f0f0, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + "ldr", "!0r, [!1X, #!2D]", kFixupNone), + ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800), + kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldr", "!0f, [!1X, !2x!3G]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldr4rXxG), SIZE_VARIANTS(0xb8606800), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldr", "!0r, [!1X, !2x!3G]", kFixupNone), + ENCODING_MAP(WIDE(kA64LdrPost3rXd), SIZE_VARIANTS(0xb8400400), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD, + "ldr", "!0r, [!1X], #!2d", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000), + kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5, + kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD, + "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone), + ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000), + kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5, + kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD, + "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone), + ENCODING_MAP(FWIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000), + kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldur", "!0f, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldur3rXd), SIZE_VARIANTS(0xb8400000), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldur", "!0r, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldxr", "!0r, [!1X]", kFixupNone), + ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "udiv", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2MnvRR, 0xea6f0000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + "lsl", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Lsr3rrd), CUSTOM_VARIANTS(0x53007c00, 0xd340fc00), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "mvn", "!0C, !1C, shift !2d", 4, kFixupNone), - ENCODING_MAP(kThumb2RsubRRI8M, 0xf1d00000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "rsbs", "!0C,!1C,#!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "neg", "!0C,!1C", 4, kFixupNone), - ENCODING_MAP(kThumb2OrrRRR, 0xea400000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "orr", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2TstRR, 0xea100f00, - kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, - "tst", "!0C, !1C, shift !2d", 4, kFixupNone), - ENCODING_MAP(kThumb2LslRRR, 0xfa00f000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "lsl", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2LsrRRR, 0xfa20f000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + "lsr", "!0r, !1r, #!2d", kFixupNone), + ENCODING_MAP(WIDE(kA64Lsr3rrr), SF_VARIANTS(0x1ac02400), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "lsr", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2AsrRRR, 0xfa40f000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "asr", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2RorRRR, 0xfa60f000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + "lsr", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000), + kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0, + "movk", "!0r, #!1d!2M", kFixupNone), + ENCODING_MAP(WIDE(kA64Movn3rdM), SF_VARIANTS(0x12800000), + kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, + "movn", "!0r, #!1d!2M", kFixupNone), + ENCODING_MAP(WIDE(kA64Movz3rdM), SF_VARIANTS(0x52800000), + kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, + "movz", "!0r, #!1d!2M", kFixupNone), + ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0), + kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0r, !1r", kFixupNone), + ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0), + kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mvn", "!0r, !1r", kFixupNone), + ENCODING_MAP(WIDE(kA64Mul3rrr), SF_VARIANTS(0x1b007c00), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "ror", "!0C, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2LslRRI5, 0xea4f0000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "lsl", "!0C, !1C, #!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2LsrRRI5, 0xea4f0010, - kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "lsr", "!0C, !1C, #!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2AsrRRI5, 0xea4f0020, - kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "asr", "!0C, !1C, #!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2RorRRI5, 0xea4f0030, - kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "ror", "!0C, !1C, #!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2BicRRI8M, 0xf0200000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "bic", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2AndRRI8M, 0xf0000000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + "mul", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0), + kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "and", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2OrrRRI8M, 0xf0400000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + "neg", "!0r, !1r!2o", kFixupNone), + ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000), + kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "orr", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2EorRRI8M, 0xf0800000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "eor", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2AddRRI8M, 0xf1100000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "adds", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2AdcRRI8M, 0xf1500000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, - "adcs", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2SubRRI8M, 0xf1b00000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2SbcRRI8M, 0xf1700000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, - "sbcs", "!0C, !1C, #!2m", 4, kFixupNone), - ENCODING_MAP(kThumb2RevRR, 0xfa90f080, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. - "rev", "!0C, !1C", 4, kFixupNone), - ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. - "revsh", "!0C, !1C", 4, kFixupNone), - ENCODING_MAP(kThumb2It, 0xbf00, - kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES, - "it:!1b", "!0c", 2, kFixupNone), - ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10, + "orr", "!0R, !1r, #!2l", kFixupNone), + ENCODING_MAP(WIDE(kA64Orr4rrro), SF_VARIANTS(0x2a000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "orr", "!0r, !1r, !2r!3o", kFixupNone), + ENCODING_MAP(kA64Ret, NO_VARIANTS(0xd65f03c0), kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES, - "fmstat", "", 4, kFixupNone), - ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40, - kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, - "vcmp.f64", "!0S, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, - "vcmp.f32", "!0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, - "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad), - ENCODING_MAP(kThumb2BCond, 0xf0008000, - kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP, - "b!1c", "!0t", 4, kFixupCondBranch), - ENCODING_MAP(kThumb2Fmrs, 0xee100a10, - kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, + "ret", "", kFixupNone), + ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00), + kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "fmrs", "!0C, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2Fmsr, 0xee000a10, - kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, + "rev", "!0r, !1r", kFixupNone), + ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0), + kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "fmsr", "!0s, !1C", 4, kFixupNone), - ENCODING_MAP(kThumb2Fmrrd, 0xec500b10, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2, - "fmrrd", "!0C, !1C, !2S", 4, kFixupNone), - ENCODING_MAP(kThumb2Fmdrr, 0xec400b10, - kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, + "rev16", "!0r, !1r", kFixupNone), + ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "fmdrr", "!0S, !1C, !2C", 4, kFixupNone), - ENCODING_MAP(kThumb2Vabsd, 0xeeb00bc0, - kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vabs.f64", "!0S, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vabss, 0xeeb00ac0, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vabs.f32", "!0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vnegd, 0xeeb10b40, - kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + "ror", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Sbc3rrr), SF_VARIANTS(0x5a000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sbc", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Sbfm4rrdd), SF_N_VARIANTS(0x13000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16, + kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1, + "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone), + ENCODING_MAP(FWIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000), + kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vneg.f64", "!0S, !1S", 4, kFixupNone), - ENCODING_MAP(kThumb2Vnegs, 0xeeb10a40, - kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + "scvtf", "!0f, !1w", kFixupNone), + ENCODING_MAP(FWIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000), + kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vneg.f32", "!0s, !1s", 4, kFixupNone), - ENCODING_MAP(kThumb2Vmovs_IMM8, 0xeeb00a00, - kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "vmov.f32", "!0s, #0x!1h", 4, kFixupNone), - ENCODING_MAP(kThumb2Vmovd_IMM8, 0xeeb00b00, - kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "vmov.f64", "!0S, #0x!1h", 4, kFixupNone), - ENCODING_MAP(kThumb2Mla, 0xfb000000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123, - "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone), - ENCODING_MAP(kThumb2Umull, 0xfba00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, - kFmtBitBlt, 3, 0, - IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, - "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone), - ENCODING_MAP(kThumb2Ldrex, 0xe8500f00, - kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone), - ENCODING_MAP(kThumb2Ldrexd, 0xe8d0007f, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD, - "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone), - ENCODING_MAP(kThumb2Strex, 0xe8400000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, - kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, - "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone), - ENCODING_MAP(kThumb2Strexd, 0xe8c00070, - kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, - kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE, - "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone), - ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, - kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, NO_OPERAND, - "clrex", "", 4, kFixupNone), - ENCODING_MAP(kThumb2Bfi, 0xf3600000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1, - kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone), - ENCODING_MAP(kThumb2Bfc, 0xf36f0000, - kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, - kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, - "bfc", "!0C,#!1d,#!2d", 4, kFixupNone), - ENCODING_MAP(kThumb2Dmb, 0xf3bf8f50, - kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP, - "dmb", "#!0B", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, - "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone), - ENCODING_MAP(kThumb2Stm, 0xe9000000, - kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE, - "stm", "!0C, <!1R>", 4, kFixupNone), - ENCODING_MAP(kThumbUndefined, 0xde00, - kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, NO_OPERAND, - "undefined", "", 2, kFixupNone), - // NOTE: vpop, vpush hard-encoded for s16+ reg list - ENCODING_MAP(kThumb2VPopCS, 0xecbd8a00, - kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0 - | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone), - ENCODING_MAP(kThumb2VPushCS, 0xed2d8a00, - kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0 - | IS_STORE, "vpush", "<!0P>", 4, kFixupNone), - ENCODING_MAP(kThumb2Vldms, 0xec900a00, - kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2 - | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone), - ENCODING_MAP(kThumb2Vstms, 0xec800a00, - kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2 - | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone), - ENCODING_MAP(kThumb2BUncond, 0xf0009000, - kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, - "b", "!0t", 4, kFixupT2Branch), - ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, - kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, - "movt", "!0C, #!1M", 4, kFixupNone), - ENCODING_MAP(kThumb2AddPCR, 0x4487, - kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, - "add", "rPC, !0C", 2, kFixupLabel), - ENCODING_MAP(kThumb2Adr, 0xf20f0000, - kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - /* Note: doesn't affect flags */ - IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, - "adr", "!0C,#!1d", 4, kFixupAdr), - ENCODING_MAP(kThumb2MovImm16LST, 0xf2400000, - kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, - "mov", "!0C, #!1M", 4, kFixupMovImmLST), - ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, - kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, - "movt", "!0C, #!1M", 4, kFixupMovImmHST), - ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, - kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 4, kFixupNone), - ENCODING_MAP(kThumb2OrrRRRs, 0xea500000, - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2Push1, 0xf84d0d04, - kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0 - | IS_STORE, "push1", "!0C", 4, kFixupNone), - ENCODING_MAP(kThumb2Pop1, 0xf85d0b04, - kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, - IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0 - | IS_LOAD, "pop1", "!0C", 4, kFixupNone), - ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */ - kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtShift, -1, -1, - IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone), - ENCODING_MAP(kThumb2Smull, 0xfb800000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, - kFmtBitBlt, 3, 0, - IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, - "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone), - ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, - kFmtUnused, -1, -1, - IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, - "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad), - ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, - kFmtBitBlt, 7, 0, - IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, - "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), - ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, - kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, - kFmtBitBlt, 7, 0, - IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, - "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), + "scvtf", "!0f, !1x", kFixupNone), + ENCODING_MAP(WIDE(kA64Sdiv3rrr), SF_VARIANTS(0x1ac00c00), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sdiv", "!0r, !1r, !2r", kFixupNone), + ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000), + kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16, + kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123, + "smaddl", "!0x, !1w, !2w, !3x", kFixupNone), + ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000), + kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5, + kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE, + "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone), + ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000), + kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5, + kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE, + "stp", "!0r, !1r, [!2X], #!3D", kFixupNone), + ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000), + kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5, + kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE, + "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone), + ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000), + kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0f, [!1X, #!2D]", kFixupNone), + ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800), + kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE, + "str", "!0f, [!1X, !2x!3G]", kFixupNone), + ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0r, [!1X, #!2D]", kFixupNone), + ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE, + "str", "!0r, [!1X, !2x!3G]", kFixupNone), + ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strb", "!0w, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "strb", "!0w, [!1X, !2x]", kFixupNone), + ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strh", "!0w, [!1X, #!2F]", kFixupNone), + ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800), + kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16, + kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE, + "strh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone), + ENCODING_MAP(WIDE(kA64StrPost3rXd), SIZE_VARIANTS(0xb8000400), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE, + "str", "!0r, [!1X], #!2d", kFixupNone), + ENCODING_MAP(FWIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000), + kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "stur", "!0f, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(WIDE(kA64Stur3rXd), SIZE_VARIANTS(0xb8000000), + kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "stur", "!0r, [!1X, #!2d]", kFixupNone), + ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00), + kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE, + "stxr", "!0w, !1r, [!2X]", kFixupNone), + ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000), + kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1, + "sub", "!0R, !1R, #!2d!3T", kFixupNone), + ENCODING_MAP(WIDE(kA64Sub4rrro), SF_VARIANTS(0x4b000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "sub", "!0r, !1r, !2r!3o", kFixupNone), + ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000), + kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "subs", "!0r, !1R, #!2d", kFixupNone), + ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000), + kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1, + kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES, + "tst", "!0r, !1r!2o", kFixupNone), + ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000), + kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16, + kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1, + "ubfm", "!0r, !1r, !2d, !3d", kFixupNone), }; // new_lir replaces orig_lir in the pcrel_fixup list. @@ -1059,153 +566,159 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) } } -/* - * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is - * not ready. Since r5FP is not updated often, it is less likely to - * generate unnecessary stall cycles. - * TUNING: No longer true - find new NOP pattern. - */ -#define PADDING_MOV_R5_R5 0x1C2D +/* Nop, used for aligning code. Nop is an alias for hint #0. */ +#define PADDING_NOP (UINT32_C(0xd503201f)) uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { - for (; lir != NULL; lir = NEXT_LIR(lir)) { - if (!lir->flags.is_nop) { - int opcode = lir->opcode; - if (IsPseudoLirOp(opcode)) { - if (UNLIKELY(opcode == kPseudoPseudoAlign4)) { - // Note: size for this opcode will be either 0 or 2 depending on final alignment. - if (lir->offset & 0x2) { - write_pos[0] = (PADDING_MOV_R5_R5 & 0xff); - write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff); - write_pos += 2; - } - } - } else if (LIKELY(!lir->flags.is_nop)) { - const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; - uint32_t bits = encoder->skeleton; - for (int i = 0; i < 4; i++) { - uint32_t operand; - uint32_t value; - operand = lir->operands[i]; - ArmEncodingKind kind = encoder->field_loc[i].kind; - if (LIKELY(kind == kFmtBitBlt)) { - value = (operand << encoder->field_loc[i].start) & - ((1 << (encoder->field_loc[i].end + 1)) - 1); - bits |= value; - } else { - switch (encoder->field_loc[i].kind) { - case kFmtSkip: - break; // Nothing to do, but continue to next. - case kFmtUnused: - i = 4; // Done, break out of the enclosing loop. - break; - case kFmtFPImm: - value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; - value |= (operand & 0x0F) << encoder->field_loc[i].start; - bits |= value; - break; - case kFmtBrOffset: - value = ((operand & 0x80000) >> 19) << 26; - value |= ((operand & 0x40000) >> 18) << 11; - value |= ((operand & 0x20000) >> 17) << 13; - value |= ((operand & 0x1f800) >> 11) << 16; - value |= (operand & 0x007ff); - bits |= value; - break; - case kFmtShift5: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtShift: - value = ((operand & 0x70) >> 4) << 12; - value |= (operand & 0x0f) << 4; - bits |= value; - break; - case kFmtBWidth: - value = operand - 1; - bits |= value; - break; - case kFmtLsb: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtImm6: - value = ((operand & 0x20) >> 5) << 9; - value |= (operand & 0x1f) << 3; - bits |= value; - break; - case kFmtDfp: { - DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand; - uint32_t reg_num = RegStorage::RegNum(operand); - /* Snag the 1-bit slice and position it */ - value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= (reg_num & 0x0f) << encoder->field_loc[i].start; - bits |= value; - break; - } - case kFmtSfp: { - DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand; - uint32_t reg_num = RegStorage::RegNum(operand); - /* Snag the 1-bit slice and position it */ - value = (reg_num & 0x1) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start; - bits |= value; - break; + for (; lir != nullptr; lir = NEXT_LIR(lir)) { + bool opcode_is_wide = IS_WIDE(lir->opcode); + ArmOpcode opcode = UNWIDE(lir->opcode); + + if (UNLIKELY(IsPseudoLirOp(opcode))) { + continue; + } + + if (LIKELY(!lir->flags.is_nop)) { + const ArmEncodingMap *encoder = &EncodingMap[opcode]; + + // Select the right variant of the skeleton. + uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton; + DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode)); + + for (int i = 0; i < 4; i++) { + ArmEncodingKind kind = encoder->field_loc[i].kind; + uint32_t operand = lir->operands[i]; + uint32_t value; + + if (LIKELY(static_cast<unsigned>(kind) <= kFmtBitBlt)) { + // Note: this will handle kFmtReg* and kFmtBitBlt. + + if (static_cast<unsigned>(kind) < kFmtBitBlt) { + bool is_zero = A64_REG_IS_ZR(operand); + + if (kIsDebugBuild) { + // Register usage checks: First establish register usage requirements based on the + // format in `kind'. + bool want_float = false; + bool want_64_bit = false; + bool want_size_match = false; + bool want_zero = false; + switch (kind) { + case kFmtRegX: + want_64_bit = true; + // Intentional fall-through. + case kFmtRegW: + want_size_match = true; + // Intentional fall-through. + case kFmtRegR: + want_zero = true; + break; + case kFmtRegXOrSp: + want_64_bit = true; + // Intentional fall-through. + case kFmtRegWOrSp: + want_size_match = true; + break; + case kFmtRegROrSp: + break; + case kFmtRegD: + want_64_bit = true; + // Intentional fall-through. + case kFmtRegS: + want_size_match = true; + // Intentional fall-through. + case kFmtRegF: + want_float = true; + break; + default: + LOG(FATAL) << "Bad fmt for arg n. " << i << " of " << encoder->name + << " (" << kind << ")"; + break; } - case kFmtImm12: - case kFmtModImm: - value = ((operand & 0x800) >> 11) << 26; - value |= ((operand & 0x700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; - break; - case kFmtImm16: - value = ((operand & 0x0800) >> 11) << 26; - value |= ((operand & 0xf000) >> 12) << 16; - value |= ((operand & 0x0700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; - break; - case kFmtOff24: { - uint32_t signbit = (operand >> 31) & 0x1; - uint32_t i1 = (operand >> 22) & 0x1; - uint32_t i2 = (operand >> 21) & 0x1; - uint32_t imm10 = (operand >> 11) & 0x03ff; - uint32_t imm11 = operand & 0x07ff; - uint32_t j1 = (i1 ^ signbit) ? 0 : 1; - uint32_t j2 = (i2 ^ signbit) ? 0 : 1; - value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | - imm11; - bits |= value; + + // Now check that the requirements are satisfied. + RegStorage reg(operand); + const char *expected = nullptr; + if (want_float) { + if (!reg.IsFloat()) { + expected = "float register"; + } else if (want_size_match && (reg.IsDouble() != want_64_bit)) { + expected = (want_64_bit) ? "double register" : "single register"; + } + } else { + if (reg.IsFloat()) { + expected = "core register"; + } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) { + expected = (want_64_bit) ? "x-register" : "w-register"; + } else if (reg.GetRegNum() == 31 && is_zero == want_zero) { + expected = (want_zero) ? "zero-register" : "sp-register"; } - break; - default: - LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + } + + // TODO(Arm64): if !want_size_match, then we still should compare the size of the + // register with the size required by the instruction width (kA64Wide). + + // Fail, if `expected' contains an unsatisfied requirement. + if (expected != nullptr) { + // TODO(Arm64): make this FATAL. + LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name + << ". Expected " << expected << ", got 0x" << std::hex << operand; + } + } + + // TODO(Arm64): this may or may not be necessary, depending on how wzr, xzr are + // defined. + if (is_zero) { + operand = 31; } } - } - if (encoder->size == 4) { - write_pos[0] = ((bits >> 16) & 0xff); - write_pos[1] = ((bits >> 24) & 0xff); - write_pos[2] = (bits & 0xff); - write_pos[3] = ((bits >> 8) & 0xff); - write_pos += 4; + + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; } else { - DCHECK_EQ(encoder->size, 2); - write_pos[0] = (bits & 0xff); - write_pos[1] = ((bits >> 8) & 0xff); - write_pos += 2; + switch (kind) { + case kFmtSkip: + break; // Nothing to do, but continue to next. + case kFmtUnused: + i = 4; // Done, break out of the enclosing loop. + break; + case kFmtShift: + // Intentional fallthrough. + case kFmtExtend: + DCHECK_EQ((operand & (1 << 6)) == 0, kind == kFmtShift); + value = (operand & 0x3f) << 10; + value |= ((operand & 0x1c0) >> 6) << 21; + bits |= value; + break; + case kFmtImm21: + value = (operand & 0x3) << 29; + value |= ((operand & 0x1ffffc) >> 2) << 5; + bits |= value; + break; + default: + LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name + << " (" << kind << ")"; + } } } + + DCHECK_EQ(encoder->size, 4); + write_pos[0] = (bits & 0xff); + write_pos[1] = ((bits >> 8) & 0xff); + write_pos[2] = ((bits >> 16) & 0xff); + write_pos[3] = ((bits >> 24) & 0xff); + write_pos += 4; } } + return write_pos; } +// Align data offset on 8 byte boundary: it will only contain double-word items, as word immediates +// are better set directly from the code (they will require no more than 2 instructions). +#define ALIGNED_DATA_OFFSET(offset) (((offset) + 0x7) & ~0x7) + // Assemble the LIR into binary instruction format. void Arm64Mir2Lir::AssembleLIR() { LIR* lir; @@ -1213,20 +726,25 @@ void Arm64Mir2Lir::AssembleLIR() { cu_->NewTimingSplit("Assemble"); int assembler_retries = 0; CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0); - data_offset_ = (starting_offset + 0x3) & ~0x3; + data_offset_ = ALIGNED_DATA_OFFSET(starting_offset); int32_t offset_adjustment; AssignDataOffsets(); /* - * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for - * non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop. + * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 + * for non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop. */ int generation = 0; while (true) { + // TODO(Arm64): check whether passes and offset adjustments are really necessary. + // Currently they aren't, as - in the fixups below - LIR are never inserted. + // Things can be different if jump ranges above 1 MB need to be supported. + // If they are not, then we can get rid of the assembler retry logic. + offset_adjustment = 0; AssemblerStatus res = kSuccess; // Assume success generation ^= 1; - // Note: nodes requring possible fixup linked in ascending order. + // Note: nodes requiring possible fixup linked in ascending order. lir = first_fixup_; prev_lir = NULL; while (lir != NULL) { @@ -1243,341 +761,54 @@ void Arm64Mir2Lir::AssembleLIR() { switch (static_cast<FixupKind>(lir->flags.fixup)) { case kFixupLabel: case kFixupNone: - break; case kFixupVLoad: - if (lir->operands[1] != rs_r15pc.GetReg()) { - break; - } - // NOTE: intentional fallthrough. - case kFixupLoad: { - /* - * PC-relative loads are mostly used to load immediates - * that are too large to materialize directly in one shot. - * However, if the load displacement exceeds the limit, - * we revert to a multiple-instruction materialization sequence. - */ - LIR *lir_target = lir->target; - CodeOffset pc = (lir->offset + 4) & ~3; - CodeOffset target = lir_target->offset + - ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); - int32_t delta = target - pc; - if (res != kSuccess) { - /* - * In this case, we're just estimating and will do it again for real. Ensure offset - * is legal. - */ - delta &= ~0x3; - } - DCHECK_EQ((delta & 0x3), 0); - // First, a sanity check for cases we shouldn't see now - if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || - ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) { - // Shouldn't happen in current codegen. - LOG(FATAL) << "Unexpected pc-rel offset " << delta; - } - // Now, check for the difficult cases - if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || - ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || - ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || - ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { - /* - * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we - * sometimes have to use it to fix up out-of-range accesses. This is where that - * happens. - */ - int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || - (lir->opcode == kThumb2LdrPcRel12)) ? lir->operands[0] : - rs_rARM_LR.GetReg(); - - // Add new Adr to generate the address. - LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, - base_reg, 0, 0, 0, 0, lir->target); - new_adr->offset = lir->offset; - new_adr->flags.fixup = kFixupAdr; - new_adr->flags.size = EncodingMap[kThumb2Adr].size; - InsertLIRBefore(lir, new_adr); - lir->offset += new_adr->flags.size; - offset_adjustment += new_adr->flags.size; - - // lir no longer pcrel, unlink and link in new_adr. - ReplaceFixup(prev_lir, lir, new_adr); - - // Convert to normal load. - offset_adjustment -= lir->flags.size; - if (lir->opcode == kThumb2LdrPcRel12) { - lir->opcode = kThumb2LdrRRI12; - } else if (lir->opcode == kThumb2LdrdPcRel8) { - lir->opcode = kThumb2LdrdI8; - } - lir->flags.size = EncodingMap[lir->opcode].size; - offset_adjustment += lir->flags.size; - // Change the load to be relative to the new Adr base. - if (lir->opcode == kThumb2LdrdI8) { - lir->operands[3] = 0; - lir->operands[2] = base_reg; - } else { - lir->operands[2] = 0; - lir->operands[1] = base_reg; - } - prev_lir = new_adr; // Continue scan with new_adr; - lir = new_adr->u.a.pcrel_next; - res = kRetryAll; - continue; - } else { - if ((lir->opcode == kThumb2Vldrs) || - (lir->opcode == kThumb2Vldrd) || - (lir->opcode == kThumb2LdrdPcRel8)) { - lir->operands[2] = delta >> 2; - } else { - lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : - delta >> 2; - } - } break; - } - case kFixupCBxZ: { + case kFixupT1Branch: { LIR *target_lir = lir->target; - CodeOffset pc = lir->offset + 4; + DCHECK(target_lir); + CodeOffset pc = lir->offset; CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - if (delta > 126 || delta < 0) { - /* - * Convert to cmp rx,#0 / b[eq/ne] tgt pair - * Make new branch instruction and insert after - */ - LIR* new_inst = - RawLIR(lir->dalvik_offset, kThumbBCond, 0, - (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, - 0, 0, 0, lir->target); - InsertLIRAfter(lir, new_inst); - - /* Convert the cb[n]z to a cmp rx, #0 ] */ - // Subtract the old size. - offset_adjustment -= lir->flags.size; - lir->opcode = kThumbCmpRI8; - /* operand[0] is src1 in both cb[n]z & CmpRI8 */ - lir->operands[1] = 0; - lir->target = 0; - lir->flags.size = EncodingMap[lir->opcode].size; - // Add back the new size. - offset_adjustment += lir->flags.size; - // Set up the new following inst. - new_inst->offset = lir->offset + lir->flags.size; - new_inst->flags.fixup = kFixupCondBranch; - new_inst->flags.size = EncodingMap[new_inst->opcode].size; - offset_adjustment += new_inst->flags.size; - - // lir no longer pcrel, unlink and link in new_inst. - ReplaceFixup(prev_lir, lir, new_inst); - prev_lir = new_inst; // Continue with the new instruction. - lir = new_inst->u.a.pcrel_next; - res = kRetryAll; - continue; - } else { - lir->operands[1] = delta >> 1; - } - break; - } - case kFixupPushPop: { - if (__builtin_popcount(lir->operands[0]) == 1) { - /* - * The standard push/pop multiple instruction - * requires at least two registers in the list. - * If we've got just one, switch to the single-reg - * encoding. - */ - lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : - kThumb2Pop1; - int reg = 0; - while (lir->operands[0]) { - if (lir->operands[0] & 0x1) { - break; - } else { - reg++; - lir->operands[0] >>= 1; - } - } - lir->operands[0] = reg; - // This won't change again, don't bother unlinking, just reset fixup kind - lir->flags.fixup = kFixupNone; + if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) { + LOG(FATAL) << "Invalid jump range in kFixupT1Branch"; } + lir->operands[0] = delta >> 2; break; } + case kFixupLoad: + case kFixupCBxZ: case kFixupCondBranch: { LIR *target_lir = lir->target; - int32_t delta = 0; DCHECK(target_lir); - CodeOffset pc = lir->offset + 4; - CodeOffset target = target_lir->offset + - ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); - delta = target - pc; - if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { - offset_adjustment -= lir->flags.size; - lir->opcode = kThumb2BCond; - lir->flags.size = EncodingMap[lir->opcode].size; - // Fixup kind remains the same. - offset_adjustment += lir->flags.size; - res = kRetryAll; - } - lir->operands[0] = delta >> 1; - break; - } - case kFixupT2Branch: { - LIR *target_lir = lir->target; - CodeOffset pc = lir->offset + 4; - CodeOffset target = target_lir->offset + - ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); - int32_t delta = target - pc; - lir->operands[0] = delta >> 1; - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) { - // Useless branch - offset_adjustment -= lir->flags.size; - lir->flags.is_nop = true; - // Don't unlink - just set to do-nothing. - lir->flags.fixup = kFixupNone; - res = kRetryAll; - } - break; - } - case kFixupT1Branch: { - LIR *target_lir = lir->target; - CodeOffset pc = lir->offset + 4; + CodeOffset pc = lir->offset; CodeOffset target = target_lir->offset + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); int32_t delta = target - pc; - if (delta > 2046 || delta < -2048) { - // Convert to Thumb2BCond w/ kArmCondAl - offset_adjustment -= lir->flags.size; - lir->opcode = kThumb2BUncond; - lir->operands[0] = 0; - lir->flags.size = EncodingMap[lir->opcode].size; - lir->flags.fixup = kFixupT2Branch; - offset_adjustment += lir->flags.size; - res = kRetryAll; - } else { - lir->operands[0] = delta >> 1; - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) { - // Useless branch - offset_adjustment -= lir->flags.size; - lir->flags.is_nop = true; - // Don't unlink - just set to do-nothing. - lir->flags.fixup = kFixupNone; - res = kRetryAll; - } + if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) { + LOG(FATAL) << "Invalid jump range in kFixupLoad"; } - break; - } - case kFixupBlx1: { - DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); - /* cur_pc is Thumb */ - CodeOffset cur_pc = (lir->offset + 4) & ~3; - CodeOffset target = lir->operands[1]; - - /* Match bit[1] in target with base */ - if (cur_pc & 0x2) { - target |= 0x2; - } - int32_t delta = target - cur_pc; - DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); - - lir->operands[0] = (delta >> 12) & 0x7ff; - NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; - break; - } - case kFixupBl1: { - DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); - /* Both cur_pc and target are Thumb */ - CodeOffset cur_pc = lir->offset + 4; - CodeOffset target = lir->operands[1]; - - int32_t delta = target - cur_pc; - DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); - - lir->operands[0] = (delta >> 12) & 0x7ff; - NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + lir->operands[1] = delta >> 2; break; } case kFixupAdr: { - EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2])); - LIR* target = lir->target; - int32_t target_disp = (tab_rec != NULL) ? tab_rec->offset + offset_adjustment - : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : - offset_adjustment); - int32_t disp = target_disp - ((lir->offset + 4) & ~3); - if (disp < 4096) { - lir->operands[1] = disp; + LIR* target_lir = lir->target; + int32_t delta; + if (target_lir) { + CodeOffset target_offs = ((target_lir->flags.generation == lir->flags.generation) ? + 0 : offset_adjustment) + target_lir->offset; + delta = target_offs - lir->offset; + } else if (lir->operands[2] >= 0) { + EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2])); + delta = tab->offset + offset_adjustment - lir->offset; } else { - // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] - // TUNING: if this case fires often, it can be improved. Not expected to be common. - LIR *new_mov16L = - RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0, - WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); - new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size; - new_mov16L->flags.fixup = kFixupMovImmLST; - new_mov16L->offset = lir->offset; - // Link the new instruction, retaining lir. - InsertLIRBefore(lir, new_mov16L); - lir->offset += new_mov16L->flags.size; - offset_adjustment += new_mov16L->flags.size; - InsertFixupBefore(prev_lir, lir, new_mov16L); - prev_lir = new_mov16L; // Now we've got a new prev. - LIR *new_mov16H = - RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0, - WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); - new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size; - new_mov16H->flags.fixup = kFixupMovImmHST; - new_mov16H->offset = lir->offset; - // Link the new instruction, retaining lir. - InsertLIRBefore(lir, new_mov16H); - lir->offset += new_mov16H->flags.size; - offset_adjustment += new_mov16H->flags.size; - InsertFixupBefore(prev_lir, lir, new_mov16H); - prev_lir = new_mov16H; // Now we've got a new prev. - - offset_adjustment -= lir->flags.size; - if (RegStorage::RegNum(lir->operands[0]) < 8) { - lir->opcode = kThumbAddRRLH; - } else { - lir->opcode = kThumbAddRRHH; - } - lir->operands[1] = rs_rARM_PC.GetReg(); - lir->flags.size = EncodingMap[lir->opcode].size; - offset_adjustment += lir->flags.size; - // Must stay in fixup list and have offset updated; will be used by LST/HSP pair. - lir->flags.fixup = kFixupNone; - res = kRetryAll; + // No fixup: this usage allows to retrieve the current PC. + delta = lir->operands[1]; } - break; - } - case kFixupMovImmLST: { - // operands[1] should hold disp, [2] has add, [3] has tab_rec - LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); - EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); - // If tab_rec is null, this is a literal load. Use target - LIR* target = lir->target; - int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; - lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; - break; - } - case kFixupMovImmHST: { - // operands[1] should hold disp, [2] has add, [3] has tab_rec - LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); - EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); - // If tab_rec is null, this is a literal load. Use target - LIR* target = lir->target; - int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; - lir->operands[1] = - ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; - break; - } - case kFixupAlign4: { - int32_t required_size = lir->offset & 0x2; - if (lir->flags.size != required_size) { - offset_adjustment += required_size - lir->flags.size; - lir->flags.size = required_size; - res = kRetryAll; + if (!IS_SIGNED_IMM21(delta)) { + LOG(FATAL) << "Jump range above 1MB in kFixupAdr"; } + lir->operands[1] = delta; break; } default: @@ -1596,7 +827,7 @@ void Arm64Mir2Lir::AssembleLIR() { LOG(FATAL) << "Assembler error - too many retries"; } starting_offset += offset_adjustment; - data_offset_ = (starting_offset + 0x3) & ~0x3; + data_offset_ = ALIGNED_DATA_OFFSET(starting_offset); AssignDataOffsets(); } } @@ -1609,7 +840,7 @@ void Arm64Mir2Lir::AssembleLIR() { write_pos = EncodeLIRs(write_pos, first_lir_insn_); DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset); - DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3); + DCHECK_EQ(data_offset_, ALIGNED_DATA_OFFSET(code_buffer_.size())); // Install literals InstallLiteralPools(); @@ -1629,8 +860,9 @@ void Arm64Mir2Lir::AssembleLIR() { } int Arm64Mir2Lir::GetInsnSize(LIR* lir) { - DCHECK(!IsPseudoLirOp(lir->opcode)); - return EncodingMap[lir->opcode].size; + ArmOpcode opcode = UNWIDE(lir->opcode); + DCHECK(!IsPseudoLirOp(opcode)); + return EncodingMap[opcode].size; } // Encode instruction bit pattern and assign offsets. @@ -1639,15 +871,14 @@ uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t off LIR* last_fixup = NULL; for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) { + ArmOpcode opcode = UNWIDE(lir->opcode); if (!lir->flags.is_nop) { if (lir->flags.fixup != kFixupNone) { - if (!IsPseudoLirOp(lir->opcode)) { - lir->flags.size = EncodingMap[lir->opcode].size; - lir->flags.fixup = EncodingMap[lir->opcode].fixup; - } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { - lir->flags.size = (offset & 0x2); - lir->flags.fixup = kFixupAlign4; + if (!IsPseudoLirOp(opcode)) { + lir->flags.size = EncodingMap[opcode].size; + lir->flags.fixup = EncodingMap[opcode].fixup; } else { + DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4); lir->flags.size = 0; lir->flags.fixup = kFixupLabel; } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 9dfee6ef24..c210816dfd 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -23,27 +23,32 @@ namespace art { +bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, + const InlineMethod& special) { + return Mir2Lir::GenSpecialCase(bb, mir, special); +} + /* * The sparse table in the literal pool is an array of <key,displacement> - * pairs. For each set, we'll load them as a pair using ldmia. - * This means that the register number of the temp we use for the key - * must be lower than the reg for the displacement. - * + * pairs. For each set, we'll load them as a pair using ldp. * The test loop will look something like: * * adr r_base, <table> - * ldr r_val, [rARM_SP, v_reg_off] + * ldr r_val, [rA64_SP, v_reg_off] * mov r_idx, #table_size - * lp: - * ldmia r_base!, {r_key, r_disp} + * loop: + * cbz r_idx, quit + * ldp r_key, r_disp, [r_base], #8 * sub r_idx, #1 * cmp r_val, r_key - * ifeq - * add rARM_PC, r_disp ; This is the branch from which we compute displacement - * cbnz r_idx, lp + * b.ne loop + * adr r_base, #0 ; This is the instruction from which we compute displacements + * add r_base, r_disp + * br r_base + * quit: */ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, - RegLocation rl_src) { + RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); @@ -60,34 +65,39 @@ void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, // Get the switch value rl_src = LoadValue(rl_src, kCoreReg); RegStorage r_base = AllocTemp(); - /* Allocate key and disp temps */ + // Allocate key and disp temps. RegStorage r_key = AllocTemp(); RegStorage r_disp = AllocTemp(); - // Make sure r_key's register number is less than r_disp's number for ldmia - if (r_key.GetReg() > r_disp.GetReg()) { - RegStorage tmp = r_disp; - r_disp = r_key; - r_key = tmp; - } // Materialize a pointer to the switch table - NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec)); + NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec)); // Set up r_idx RegStorage r_idx = AllocTemp(); LoadConstant(r_idx, size); - // Establish loop branch target - LIR* target = NewLIR0(kPseudoTargetLabel); - // Load next key/disp - NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum())); + + // Entry of loop. + LIR* loop_entry = NewLIR0(kPseudoTargetLabel); + LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0); + + // Load next key/disp. + NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2); + OpRegRegImm(kOpSub, r_idx, r_idx, 1); + + // Go to next case, if key does not match. OpRegReg(kOpCmp, r_key, rl_src.reg); - // Go if match. NOTE: No instruction set switch here - must stay Thumb2 - LIR* it = OpIT(kCondEq, ""); - LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg()); - OpEndIT(it); - tab_rec->anchor = switch_branch; - // Needs to use setflags encoding here - OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags. - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - OpCondBranch(kCondNe, target); + OpCondBranch(kCondNe, loop_entry); + + // Key does match: branch to case label. + LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1); + tab_rec->anchor = switch_label; + + // Add displacement to base branch address and go! + OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(), + ENCODE_NO_SHIFT, true); + NewLIR1(kA64Br1x, r_base.GetReg()); + + // Loop exit label. + LIR* loop_exit = NewLIR0(kPseudoTargetLabel); + branch_out->target = loop_exit; } @@ -111,29 +121,35 @@ void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, rl_src = LoadValue(rl_src, kCoreReg); RegStorage table_base = AllocTemp(); // Materialize a pointer to the switch table - NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec)); + NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec)); int low_key = s4FromSwitchData(&table[2]); - RegStorage keyReg; + RegStorage key_reg; // Remove the bias, if necessary if (low_key == 0) { - keyReg = rl_src.reg; + key_reg = rl_src.reg; } else { - keyReg = AllocTemp(); - OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key); + key_reg = AllocTemp(); + OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key); } // Bounds check - if < 0 or >= size continue following switch - OpRegImm(kOpCmp, keyReg, size-1); + OpRegImm(kOpCmp, key_reg, size - 1); LIR* branch_over = OpCondBranch(kCondHi, NULL); // Load the displacement from the switch table RegStorage disp_reg = AllocTemp(); - LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32); + LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32); + + // Get base branch address. + RegStorage branch_reg = AllocTemp(); + LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1); + tab_rec->anchor = switch_label; - // ..and go! NOTE: No instruction set switch here - must stay Thumb2 - LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg()); - tab_rec->anchor = switch_branch; + // Add displacement to base branch address and go! + OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(), + ENCODE_NO_SHIFT, true); + NewLIR1(kA64Br1x, branch_reg.GetReg()); - /* branch_over target here */ + // branch_over target here LIR* target = NewLIR0(kPseudoTargetLabel); branch_over->target = target; } @@ -163,13 +179,13 @@ void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { // Making a call - use explicit registers FlushAllRegs(); /* Everything to home location */ - LoadValueDirectFixed(rl_src, rs_r0); - LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(), - rs_rARM_LR); + LoadValueDirectFixed(rl_src, rs_x0); + LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pHandleFillArrayData), + rs_rA64_LR); // Materialize a pointer to the fill data image - NewLIR3(kThumb2Adr, rs_r1.GetReg(), 0, WrapPointer(tab_rec)); + NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec)); ClobberCallerSave(); - LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); + LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR); MarkSafepointPC(call_inst); } @@ -180,7 +196,7 @@ void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { FlushAllRegs(); // FIXME: need separate LoadValues for object references. - LoadValueDirectFixed(rl_src, rs_r0); // Get obj + LoadValueDirectFixed(rl_src, rs_x0); // Get obj LockCallTemps(); // Prepare for explicit register usage constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. if (kArchVariantHasGoodBranchPredictor) { @@ -190,17 +206,15 @@ void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { } else { // If the null-check fails its handled by the slow-path to reduce exception related meta-data. if (Runtime::Current()->ExplicitNullChecks()) { - null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL); } } - Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); - NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), - mirror::Object::MonitorOffset().Int32Value() >> 2); + Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2); + NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2); MarkPossibleNullPointerException(opt_flags); - LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); - NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(), - mirror::Object::MonitorOffset().Int32Value() >> 2); - LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); + LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL); + NewLIR4(kA64Stxr3wrX, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2); + LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL); LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); @@ -210,9 +224,9 @@ void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { } // TODO: move to a slow path. // Go expensive route - artLockObjectFromCode(obj); - LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR); + LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR); ClobberCallerSave(); - LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); + LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR); MarkSafepointPC(call_inst); LIR* success_target = NewLIR0(kPseudoTargetLabel); @@ -220,24 +234,19 @@ void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { GenMemBarrier(kLoadLoad); } else { // Explicit null-check as slow-path is entered using an IT. - GenNullCheck(rs_r0, opt_flags); - Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); - NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), - mirror::Object::MonitorOffset().Int32Value() >> 2); + GenNullCheck(rs_x0, opt_flags); + Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2); MarkPossibleNullPointerException(opt_flags); - OpRegImm(kOpCmp, rs_r1, 0); - LIR* it = OpIT(kCondEq, ""); - NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(), - mirror::Object::MonitorOffset().Int32Value() >> 2); - OpEndIT(it); - OpRegImm(kOpCmp, rs_r1, 0); - it = OpIT(kCondNe, "T"); + NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, rs_x1, 0); + OpIT(kCondEq, ""); + NewLIR4(kA64Stxr3wrX/*eq*/, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, rs_x1, 0); + OpIT(kCondNe, "T"); // Go expensive route - artLockObjectFromCode(self, obj); - LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), - rs_rARM_LR); + LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR); ClobberCallerSave(); - LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); - OpEndIT(it); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR); MarkSafepointPC(call_inst); GenMemBarrier(kLoadLoad); } @@ -250,10 +259,10 @@ void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { */ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { FlushAllRegs(); - LoadValueDirectFixed(rl_src, rs_r0); // Get obj + LoadValueDirectFixed(rl_src, rs_x0); // Get obj LockCallTemps(); // Prepare for explicit register usage LIR* null_check_branch = nullptr; - Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); + Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2); constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. if (kArchVariantHasGoodBranchPredictor) { if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { @@ -261,14 +270,14 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { } else { // If the null-check fails its handled by the slow-path to reduce exception related meta-data. if (Runtime::Current()->ExplicitNullChecks()) { - null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL); } } - Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1); MarkPossibleNullPointerException(opt_flags); - LoadConstantNoClobber(rs_r3, 0); - LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); - Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); + LoadConstantNoClobber(rs_x3, 0); + LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL); + Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3); LIR* unlock_success_branch = OpUnconditionalBranch(NULL); LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); @@ -278,9 +287,9 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { } // TODO: move to a slow path. // Go expensive route - artUnlockObjectFromCode(obj); - LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR); + LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR); ClobberCallerSave(); - LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); + LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR); MarkSafepointPC(call_inst); LIR* success_target = NewLIR0(kPseudoTargetLabel); @@ -288,33 +297,31 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { GenMemBarrier(kStoreLoad); } else { // Explicit null-check as slow-path is entered using an IT. - GenNullCheck(rs_r0, opt_flags); - Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock + GenNullCheck(rs_x0, opt_flags); + Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1); // Get lock MarkPossibleNullPointerException(opt_flags); - Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); - LoadConstantNoClobber(rs_r3, 0); + Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_x2); + LoadConstantNoClobber(rs_x3, 0); // Is lock unheld on lock or held by us (==thread_id) on unlock? - OpRegReg(kOpCmp, rs_r1, rs_r2); - LIR* it = OpIT(kCondEq, "EE"); - Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); + OpRegReg(kOpCmp, rs_x1, rs_x2); + OpIT(kCondEq, "EE"); + Store32Disp/*eq*/(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3); // Go expensive route - UnlockObjectFromCode(obj); - LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), - rs_rARM_LR); + LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR); ClobberCallerSave(); - LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); - OpEndIT(it); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR); MarkSafepointPC(call_inst); GenMemBarrier(kStoreLoad); } } void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) { - int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); + int ex_offset = A64_THREAD_EXCEPTION_INT_OFFS; RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage reset_reg = AllocTemp(); - Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg); + Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg); LoadConstant(reset_reg, 0); - Store32Disp(rs_rARM_SELF, ex_offset, reset_reg); + Store32Disp(rs_rA64_SELF, ex_offset, reset_reg); FreeTemp(reset_reg); StoreValue(rl_dest, rl_result); } @@ -326,7 +333,7 @@ void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { RegStorage reg_card_base = AllocTemp(); RegStorage reg_card_no = AllocTemp(); LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); - LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base); + LoadWordDisp(rs_rA64_SELF, A64_THREAD_CARD_TABLE_INT_OFFS, reg_card_base); OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte); LIR* target = NewLIR0(kPseudoTargetLabel); @@ -336,17 +343,16 @@ void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { } void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { - int spill_count = num_core_spills_ + num_fp_spills_; /* - * On entry, r0, r1, r2 & r3 are live. Let the register allocation + * On entry, x0, x1, x2 & x3 are live. Let the register allocation * mechanism know so it doesn't try to use any of them when * expanding the frame or flushing. This leaves the utility * code with a single temp: r12. This should be enough. */ - LockTemp(rs_r0); - LockTemp(rs_r1); - LockTemp(rs_r2); - LockTemp(rs_r3); + LockTemp(rs_x0); + LockTemp(rs_x1); + LockTemp(rs_x2); + LockTemp(rs_x3); /* * We can safely skip the stack overflow check if we're @@ -356,14 +362,30 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes)); NewLIR0(kPseudoMethodEntry); + if (!skip_overflow_check) { + LoadWordDisp(rs_rA64_SELF, A64_THREAD_STACK_END_INT_OFFS, rs_x12); + OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true); if (Runtime::Current()->ExplicitStackOverflowChecks()) { /* Load stack limit */ - Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12); + // TODO(Arm64): fix the line below: + // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow); + } else { + // Implicit stack overflow check. + // Generate a load from [sp, #-framesize]. If this is in the stack + // redzone we will get a segmentation fault. + // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR? + Load32Disp(rs_rA64_SP, 0, rs_wzr); + MarkPossibleStackOverflowException(); } + } else if (frame_size_ > 0) { + OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true); } + /* Spill core callee saves */ - NewLIR1(kThumb2Push, core_spill_mask_); + if (core_spill_mask_) { + SpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_); + } /* Need to spill any FP regs? */ if (num_fp_spills_) { /* @@ -371,107 +393,40 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) * they are pushed as a contiguous block. When promoting from * the fp set, we must allocate all singles from s16..highest-promoted */ - NewLIR1(kThumb2VPushCS, num_fp_spills_); - } - - const int spill_size = spill_count * 4; - const int frame_size_without_spills = frame_size_ - spill_size; - if (!skip_overflow_check) { - if (Runtime::Current()->ExplicitStackOverflowChecks()) { - class StackOverflowSlowPath : public LIRSlowPath { - public: - StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace) - : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr), - sp_displace_(sp_displace) { - } - void Compile() OVERRIDE { - m2l_->ResetRegPool(); - m2l_->ResetDefTracking(); - GenerateTargetLabel(kPseudoThrowTarget); - if (restore_lr_) { - m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); - } - m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); - m2l_->ClobberCallerSave(); - ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); - // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes - // codegen and target are in thumb2 mode. - // NOTE: native pointer. - m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); - } - - private: - const bool restore_lr_; - const size_t sp_displace_; - }; - if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) { - OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills); - LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr); - // Need to restore LR since we used it as a temp. - AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size)); - OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack - } else { - // If the frame is small enough we are guaranteed to have enough space that remains to - // handle signals on the user stack. - OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); - LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); - AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_)); - } - } else { - // Implicit stack overflow check. - // Generate a load from [sp, #-overflowsize]. If this is in the stack - // redzone we will get a segmentation fault. - // - // Caveat coder: if someone changes the kStackOverflowReservedBytes value - // we need to make sure that it's loadable in an immediate field of - // a sub instruction. Otherwise we will get a temp allocation and the - // code size will increase. - OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); - Load32Disp(rs_r12, 0, rs_r12); - MarkPossibleStackOverflowException(); - OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); - } - } else { - OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + // TODO(Arm64): SpillFPRegs(rA64_SP, frame_size_, core_spill_mask_); } FlushIns(ArgLocs, rl_method); - FreeTemp(rs_r0); - FreeTemp(rs_r1); - FreeTemp(rs_r2); - FreeTemp(rs_r3); + FreeTemp(rs_x0); + FreeTemp(rs_x1); + FreeTemp(rs_x2); + FreeTemp(rs_x3); } void Arm64Mir2Lir::GenExitSequence() { - int spill_count = num_core_spills_ + num_fp_spills_; /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. */ - LockTemp(rs_r0); - LockTemp(rs_r1); + LockTemp(rs_x0); + LockTemp(rs_x1); NewLIR0(kPseudoMethodExit); - OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); /* Need to restore any FP callee saves? */ if (num_fp_spills_) { - NewLIR1(kThumb2VPopCS, num_fp_spills_); + // TODO(Arm64): UnspillFPRegs(num_fp_spills_); } - if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) { - /* Unspill rARM_LR to rARM_PC */ - core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum()); - core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum()); - } - NewLIR1(kThumb2Pop, core_spill_mask_); - if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) { - /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ - NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); + if (core_spill_mask_) { + UnSpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_); } + + OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true); + NewLIR0(kA64Ret); } void Arm64Mir2Lir::GenSpecialExitSequence() { - NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); + NewLIR0(kA64Ret); } } // namespace art diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 3d5e0543c8..903be10144 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -22,7 +22,7 @@ namespace art { -class Arm64Mir2Lir FINAL : public Mir2Lir { +class Arm64Mir2Lir : public Mir2Lir { public: Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -31,7 +31,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { RegLocation rl_dest, int lit); bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; LIR* CheckSuspendUsingLoad() OVERRIDE; - RegStorage LoadHelper(ThreadOffset<4> offset); + RegStorage LoadHelper(A64ThreadOffset offset); LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size) OVERRIDE; LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, @@ -75,7 +75,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset); int AssignInsnOffsets(); void AssignOffsets(); - static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); + uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); @@ -95,6 +95,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); + void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, @@ -141,6 +142,11 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special); + + uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2); + void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask); + void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask); // Required for target - single operation generators. LIR* OpUnconditionalBranch(LIR* target); @@ -156,6 +162,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { LIR* OpReg(OpKind op, RegStorage r_dest_src); void OpRegCopy(RegStorage r_dest, RegStorage r_src); LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); + LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide); LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset); LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2); @@ -165,44 +172,50 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); LIR* OpTestSuspend(LIR* target); - LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset); + LIR* OpThreadMem(OpKind op, A64ThreadOffset thread_offset); LIR* OpVldm(RegStorage r_base, int count); LIR* OpVstm(RegStorage r_base, int count); void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset); void OpRegCopyWide(RegStorage dest, RegStorage src); - void OpTlsCmp(ThreadOffset<4> offset, int val); + void OpTlsCmp(A64ThreadOffset offset, int val); LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size); LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size); - LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, - int shift); - LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); - static const ArmEncodingMap EncodingMap[kArmLast]; + LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift, + bool is_wide = false); + LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift, bool is_wide = false); + static const ArmEncodingMap EncodingMap[kA64Last]; int EncodeShift(int code, int amount); - int ModifiedImmediate(uint32_t value); + int EncodeExtend(int extend_type, int amount); + bool IsExtendEncoding(int encoded_value); + int EncodeLogicalImmediate(bool is_wide, uint64_t value); + uint64_t DecodeLogicalImmediate(bool is_wide, int value); + ArmConditionCode ArmConditionEncoding(ConditionCode code); bool InexpensiveConstantInt(int32_t value); bool InexpensiveConstantFloat(int32_t value); bool InexpensiveConstantLong(int64_t value); bool InexpensiveConstantDouble(int64_t value); + void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); + int LoadArgRegs(CallInfo* info, int call_state, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + private: void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); - LIR* LoadFPConstantValue(int r_dest, int value); + LIR* LoadFPConstantValue(int r_dest, int32_t value); + LIR* LoadFPConstantValueWide(int r_dest, int64_t value); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); void AssignDataOffsets(); RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, bool check_zero); RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div); - typedef struct { - OpKind op; - uint32_t shift; - } EasyMultiplyOp; - bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op); - bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops); - void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops); }; } // namespace art diff --git a/compiler/dex/quick/arm64/create.sh b/compiler/dex/quick/arm64/create.sh deleted file mode 100644 index a3833bdc3a..0000000000 --- a/compiler/dex/quick/arm64/create.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -set -e - -if [ ! -d ./arm ]; then - echo "Directory ./arm not found." - exit 1 -fi - -mkdir -p arm64 -dst=`cd arm64 && pwd` -cd arm/ -for f in *; do - cp $f $dst/`echo $f | sed 's/arm/arm64/g'` -done - -sed -i 's,ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_,ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_,g' $dst/arm64_lir.h -sed -i 's,ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_,ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_,g' $dst/codegen_arm64.h -sed -i -e 's,ArmMir2Lir,Arm64Mir2Lir,g' -e 's,arm_lir.h,arm64_lir.h,g' -e 's,codegen_arm.h,codegen_arm64.h,g' $dst/*.h $dst/*.cc diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index 96842836aa..c2a550e0b9 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -21,8 +21,8 @@ namespace art { void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - int op = kThumbBkpt; + RegLocation rl_src1, RegLocation rl_src2) { + int op = kA64Brk1d; RegLocation rl_result; /* @@ -32,24 +32,24 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest switch (opcode) { case Instruction::ADD_FLOAT_2ADDR: case Instruction::ADD_FLOAT: - op = kThumb2Vadds; + op = kA64Fadd3fff; break; case Instruction::SUB_FLOAT_2ADDR: case Instruction::SUB_FLOAT: - op = kThumb2Vsubs; + op = kA64Fsub3fff; break; case Instruction::DIV_FLOAT_2ADDR: case Instruction::DIV_FLOAT: - op = kThumb2Vdivs; + op = kA64Fdiv3fff; break; case Instruction::MUL_FLOAT_2ADDR: case Instruction::MUL_FLOAT: - op = kThumb2Vmuls; + op = kA64Fmul3fff; break; case Instruction::REM_FLOAT_2ADDR: case Instruction::REM_FLOAT: FlushAllRegs(); // Send everything to home location - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2, + CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2, false); rl_result = GetReturn(true); StoreValue(rl_dest, rl_result); @@ -68,31 +68,31 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest } void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, - RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - int op = kThumbBkpt; + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + int op = kA64Brk1d; RegLocation rl_result; switch (opcode) { case Instruction::ADD_DOUBLE_2ADDR: case Instruction::ADD_DOUBLE: - op = kThumb2Vaddd; + op = kA64Fadd3fff; break; case Instruction::SUB_DOUBLE_2ADDR: case Instruction::SUB_DOUBLE: - op = kThumb2Vsubd; + op = kA64Fsub3fff; break; case Instruction::DIV_DOUBLE_2ADDR: case Instruction::DIV_DOUBLE: - op = kThumb2Vdivd; + op = kA64Fdiv3fff; break; case Instruction::MUL_DOUBLE_2ADDR: case Instruction::MUL_DOUBLE: - op = kThumb2Vmuld; + op = kA64Fmul3fff; break; case Instruction::REM_DOUBLE_2ADDR: case Instruction::REM_DOUBLE: FlushAllRegs(); // Send everything to home location - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2, + CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2, false); rl_result = GetReturnWide(true); StoreValueWide(rl_dest, rl_result); @@ -111,98 +111,62 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, rl_result = EvalLoc(rl_dest, kFPReg, true); DCHECK(rl_dest.wide); DCHECK(rl_result.wide); - NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); StoreValueWide(rl_dest, rl_result); } -void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) { - int op = kThumbBkpt; - int src_reg; +void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src) { + int op = kA64Brk1d; RegLocation rl_result; switch (opcode) { case Instruction::INT_TO_FLOAT: - op = kThumb2VcvtIF; + op = kA64Scvtf2fw; break; case Instruction::FLOAT_TO_INT: - op = kThumb2VcvtFI; + op = kA64Fcvtzs2wf; break; case Instruction::DOUBLE_TO_FLOAT: - op = kThumb2VcvtDF; + op = kA64Fcvt2sS; break; case Instruction::FLOAT_TO_DOUBLE: - op = kThumb2VcvtFd; + op = kA64Fcvt2Ss; break; case Instruction::INT_TO_DOUBLE: - op = kThumb2VcvtF64S32; + op = FWIDE(kA64Scvtf2fw); break; case Instruction::DOUBLE_TO_INT: - op = kThumb2VcvtDI; + op = FWIDE(kA64Fcvtzs2wf); + break; + case Instruction::LONG_TO_DOUBLE: + op = FWIDE(kA64Scvtf2fx); break; - case Instruction::LONG_TO_DOUBLE: { - rl_src = LoadValueWide(rl_src, kFPReg); - RegStorage src_low = rl_src.reg.DoubleToLowSingle(); - RegStorage src_high = rl_src.reg.DoubleToHighSingle(); - rl_result = EvalLoc(rl_dest, kFPReg, true); - RegStorage tmp1 = AllocTempDouble(); - RegStorage tmp2 = AllocTempDouble(); - - NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg()); - NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg()); - LoadConstantWide(tmp2, 0x41f0000000000000LL); - NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg()); - FreeTemp(tmp1); - FreeTemp(tmp2); - StoreValueWide(rl_dest, rl_result); - return; - } case Instruction::FLOAT_TO_LONG: - GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src); - return; - case Instruction::LONG_TO_FLOAT: { - rl_src = LoadValueWide(rl_src, kFPReg); - RegStorage src_low = rl_src.reg.DoubleToLowSingle(); - RegStorage src_high = rl_src.reg.DoubleToHighSingle(); - rl_result = EvalLoc(rl_dest, kFPReg, true); - // Allocate temp registers. - RegStorage high_val = AllocTempDouble(); - RegStorage low_val = AllocTempDouble(); - RegStorage const_val = AllocTempDouble(); - // Long to double. - NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg()); - NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg()); - LoadConstantWide(const_val, INT64_C(0x41f0000000000000)); - NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg()); - // Double to float. - NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg()); - // Free temp registers. - FreeTemp(high_val); - FreeTemp(low_val); - FreeTemp(const_val); - // Store result. - StoreValue(rl_dest, rl_result); - return; - } + op = kA64Fcvtzs2xf; + break; + case Instruction::LONG_TO_FLOAT: + op = kA64Scvtf2fx; + break; case Instruction::DOUBLE_TO_LONG: - GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src); - return; + op = FWIDE(kA64Fcvtzs2xf); + break; default: LOG(FATAL) << "Unexpected opcode: " << opcode; } + if (rl_src.wide) { rl_src = LoadValueWide(rl_src, kFPReg); - src_reg = rl_src.reg.GetReg(); } else { rl_src = LoadValue(rl_src, kFPReg); - src_reg = rl_src.reg.GetReg(); } + + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + if (rl_dest.wide) { - rl_result = EvalLoc(rl_dest, kFPReg, true); - NewLIR2(op, rl_result.reg.GetReg(), src_reg); StoreValueWide(rl_dest, rl_result); } else { - rl_result = EvalLoc(rl_dest, kFPReg, true); - NewLIR2(op, rl_result.reg.GetReg(), src_reg); StoreValue(rl_dest, rl_result); } } @@ -217,15 +181,14 @@ void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, rl_src2 = mir_graph_->GetSrcWide(mir, 2); rl_src1 = LoadValueWide(rl_src1, kFPReg); rl_src2 = LoadValueWide(rl_src2, kFPReg); - NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); } else { rl_src1 = mir_graph_->GetSrc(mir, 0); rl_src2 = mir_graph_->GetSrc(mir, 1); rl_src1 = LoadValue(rl_src1, kFPReg); rl_src2 = LoadValue(rl_src2, kFPReg); - NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); } - NewLIR0(kThumb2Fmstat); ConditionCode ccode = mir->meta.ccode; switch (ccode) { case kCondEq: @@ -259,7 +222,7 @@ void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { + RegLocation rl_src1, RegLocation rl_src2) { bool is_double = false; int default_result = -1; RegLocation rl_result; @@ -291,7 +254,7 @@ void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, ClobberSReg(rl_dest.s_reg_low); rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadConstant(rl_result.reg, default_result); - NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); } else { rl_src1 = LoadValue(rl_src1, kFPReg); rl_src2 = LoadValue(rl_src2, kFPReg); @@ -299,20 +262,20 @@ void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, ClobberSReg(rl_dest.s_reg_low); rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadConstant(rl_result.reg, default_result); - NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); } DCHECK(!rl_result.reg.IsFloat()); - NewLIR0(kThumb2Fmstat); - - LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, ""); - NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(), - ModifiedImmediate(-default_result)); // Must not alter ccodes - OpEndIT(it); - it = OpIT(kCondEq, ""); - LoadConstant(rl_result.reg, 0); - OpEndIT(it); + // TODO(Arm64): should we rather do this? + // csinc wD, wzr, wzr, eq + // csneg wD, wD, wD, le + // (which requires 2 instructions rather than 3) + // Rd = if cond then Rd else -Rd. + NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(), + rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe); + NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(), + kArmCondEq); StoreValue(rl_dest, rl_result); } @@ -320,7 +283,7 @@ void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { RegLocation rl_result; rl_src = LoadValue(rl_src, kFPReg); rl_result = EvalLoc(rl_dest, kFPReg, true); - NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg()); StoreValue(rl_dest, rl_result); } @@ -328,31 +291,32 @@ void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { RegLocation rl_result; rl_src = LoadValueWide(rl_src, kFPReg); rl_result = EvalLoc(rl_dest, kFPReg, true); - NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); StoreValueWide(rl_dest, rl_result); } bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) { - DCHECK_EQ(cu_->instruction_set, kThumb2); + // TODO(Arm64): implement this. + UNIMPLEMENTED(FATAL) << "GenInlinedSqrt not implemented for Arm64"; + + DCHECK_EQ(cu_->instruction_set, kArm64); LIR *branch; RegLocation rl_src = info->args[0]; RegLocation rl_dest = InlineTargetWide(info); // double place for result rl_src = LoadValueWide(rl_src, kFPReg); RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); - NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg()); - NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg()); - NewLIR0(kThumb2Fmstat); - branch = NewLIR2(kThumbBCond, 0, kArmCondEq); + NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); + NewLIR2(FWIDE(kA64Fcmp2ff), rl_result.reg.GetReg(), rl_result.reg.GetReg()); + branch = NewLIR2(kA64B2ct, kArmCondEq, 0); ClobberCallerSave(); LockCallTemps(); // Using fixed registers - RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt)); - NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg()); - NewLIR1(kThumbBlxR, r_tgt.GetReg()); - NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg()); + RegStorage r_tgt = LoadHelper(A64_QUICK_ENTRYPOINT_OFFSET(pSqrt)); + // NewLIR3(kThumb2Fmrrd, r0, r1, rl_src.reg.GetReg()); + NewLIR1(kA64Blr1x, r_tgt.GetReg()); + // NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), r0, r1); branch->target = NewLIR0(kPseudoTargetLabel); StoreValueWide(rl_dest, rl_result); return true; } - } // namespace art diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index c5a3ab6b39..709f583e3b 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -29,152 +29,55 @@ LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage s return OpCondBranch(cond, target); } -/* - * Generate a Thumb2 IT instruction, which can nullify up to - * four subsequent instructions based on a condition and its - * inverse. The condition applies to the first instruction, which - * is executed if the condition is met. The string "guide" consists - * of 0 to 3 chars, and applies to the 2nd through 4th instruction. - * A "T" means the instruction is executed if the condition is - * met, and an "E" means the instruction is executed if the condition - * is not met. - */ +// TODO(Arm64): remove this. LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) { - int mask; - int mask3 = 0; - int mask2 = 0; - int mask1 = 0; - ArmConditionCode code = ArmConditionEncoding(ccode); - int cond_bit = code & 1; - int alt_bit = cond_bit ^ 1; - - // Note: case fallthroughs intentional - switch (strlen(guide)) { - case 3: - mask1 = (guide[2] == 'T') ? cond_bit : alt_bit; - case 2: - mask2 = (guide[1] == 'T') ? cond_bit : alt_bit; - case 1: - mask3 = (guide[0] == 'T') ? cond_bit : alt_bit; - break; - case 0: - break; - default: - LOG(FATAL) << "OAT: bad case in OpIT"; - } - mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | - (1 << (3 - strlen(guide))); - return NewLIR2(kThumb2It, code, mask); + LOG(FATAL) << "Unexpected use of OpIT for Arm64"; + return NULL; } void Arm64Mir2Lir::OpEndIT(LIR* it) { - // TODO: use the 'it' pointer to do some checks with the LIR, for example - // we could check that the number of instructions matches the mask - // in the IT instruction. - CHECK(it != nullptr); - GenBarrier(); + LOG(FATAL) << "Unexpected use of OpEndIT for Arm64"; } /* * 64-bit 3way compare function. - * mov rX, #-1 - * cmp op1hi, op2hi - * blt done - * bgt flip - * sub rX, op1lo, op2lo (treat as unsigned) - * beq done - * ite hi - * mov(hi) rX, #-1 - * mov(!hi) rX, #1 - * flip: - * neg rX - * done: + * cmp xA, xB + * csinc wC, wzr, wzr, eq + * csneg wC, wC, wC, le */ -void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - LIR* target1; - LIR* target2; +void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + RegLocation rl_result; rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); - RegStorage t_reg = AllocTemp(); - LoadConstant(t_reg, -1); - OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); - LIR* branch1 = OpCondBranch(kCondLt, NULL); - LIR* branch2 = OpCondBranch(kCondGt, NULL); - OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); - LIR* branch3 = OpCondBranch(kCondEq, NULL); - - LIR* it = OpIT(kCondHi, "E"); - NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1)); - LoadConstant(t_reg, 1); - OpEndIT(it); - - target2 = NewLIR0(kPseudoTargetLabel); - OpRegReg(kOpNeg, t_reg, t_reg); - - target1 = NewLIR0(kPseudoTargetLabel); + rl_result = EvalLoc(rl_dest, kCoreReg, true); - RegLocation rl_temp = LocCReturn(); // Just using as template, will change - rl_temp.reg.SetReg(t_reg.GetReg()); - StoreValue(rl_dest, rl_temp); - FreeTemp(t_reg); - - branch1->target = target1; - branch2->target = target2; - branch3->target = branch1->target; + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); + NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq); + NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(), + rl_result.reg.GetReg(), kArmCondLe); + StoreValue(rl_dest, rl_result); } void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, - int64_t val, ConditionCode ccode) { - int32_t val_lo = Low32Bits(val); - int32_t val_hi = High32Bits(val); - DCHECK_GE(ModifiedImmediate(val_lo), 0); - DCHECK_GE(ModifiedImmediate(val_hi), 0); + int64_t val, ConditionCode ccode) { LIR* taken = &block_label_list_[bb->taken]; - LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); - RegStorage low_reg = rl_src1.reg.GetLow(); - RegStorage high_reg = rl_src1.reg.GetHigh(); if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { - RegStorage t_reg = AllocTemp(); - NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0); - FreeTemp(t_reg); + ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt; + LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0); + branch->target = taken; + } else { + OpRegImm64(kOpCmp, rl_src1.reg, val, /*is_wide*/true); OpCondBranch(ccode, taken); - return; - } - - switch (ccode) { - case kCondEq: - case kCondNe: - OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken); - break; - case kCondLt: - OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); - OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); - ccode = kCondUlt; - break; - case kCondLe: - OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); - OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); - ccode = kCondLs; - break; - case kCondGt: - OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); - OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); - ccode = kCondHi; - break; - case kCondGe: - OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); - OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); - ccode = kCondUge; - break; - default: - LOG(FATAL) << "Unexpected ccode: " << ccode; } - OpCmpImmBranch(ccode, low_reg, val_lo, taken); } void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(FATAL); + RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); RegLocation rl_dest = mir_graph_->GetDest(mir); @@ -194,21 +97,21 @@ void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, ""); + OpIT(true_val == 0 ? kCondNe : kCondUge, ""); LoadConstant(rl_result.reg, false_val); - OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - LIR* it = OpIT(kCondLs, ""); + OpIT(kCondLs, ""); LoadConstant(rl_result.reg, false_val); - OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } else if (cheap_false_val && InexpensiveConstantInt(true_val)) { OpRegImm(kOpCmp, rl_src.reg, 0); - LIR* it = OpIT(ccode, "E"); + OpIT(ccode, "E"); LoadConstant(rl_result.reg, true_val); LoadConstant(rl_result.reg, false_val); - OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } else { // Unlikely case - could be tuned. RegStorage t_reg1 = AllocTemp(); @@ -216,10 +119,10 @@ void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { LoadConstant(t_reg1, true_val); LoadConstant(t_reg2, false_val); OpRegImm(kOpCmp, rl_src.reg, 0); - LIR* it = OpIT(ccode, "E"); + OpIT(ccode, "E"); OpRegCopy(rl_result.reg, t_reg1); OpRegCopy(rl_result.reg, t_reg2); - OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } } else { // MOVE case @@ -229,24 +132,26 @@ void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { rl_false = LoadValue(rl_false, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegImm(kOpCmp, rl_src.reg, 0); - LIR* it = nullptr; if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? - it = OpIT(NegateComparison(ccode), ""); + OpIT(NegateComparison(ccode), ""); OpRegCopy(rl_result.reg, rl_false.reg); } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place? - it = OpIT(ccode, ""); + OpIT(ccode, ""); OpRegCopy(rl_result.reg, rl_true.reg); } else { // Normal - select between the two. - it = OpIT(ccode, "E"); + OpIT(ccode, "E"); OpRegCopy(rl_result.reg, rl_true.reg); OpRegCopy(rl_result.reg, rl_false.reg); } - OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } StoreValue(rl_dest, rl_result); } void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(FATAL); + RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); // Normalize such that if either operand is constant, src2 will be constant. @@ -259,8 +164,8 @@ void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { RegLocation rl_temp = UpdateLocWide(rl_src2); // Do special compare/branch against simple const operand if not already in registers. int64_t val = mir_graph_->ConstantValueWide(rl_src2); - if ((rl_temp.location != kLocPhysReg) && - ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { + if ((rl_temp.location != kLocPhysReg) + /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) { GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); return; } @@ -308,56 +213,77 @@ void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { * Generate a register comparison to an immediate and branch. Caller * is responsible for setting branch target field. */ -LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) { +LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, + LIR* target) { LIR* branch; ArmConditionCode arm_cond = ArmConditionEncoding(cond); - /* - * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit - * compare-and-branch if zero is ideal if it will reach. However, because null checks - * branch forward to a slow path, they will frequently not reach - and thus have to - * be converted to a long form during assembly (which will trigger another assembly - * pass). Here we estimate the branch distance for checks, and if large directly - * generate the long form in an attempt to avoid an extra assembly pass. - * TODO: consider interspersing slowpaths in code following unconditional branches. - */ - bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget)); - skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64); - if (!skip && reg.Low8() && (check_value == 0) && - ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { - branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, - reg.GetReg(), 0); + if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) { + ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt; + branch = NewLIR2(opcode, reg.GetReg(), 0); } else { OpRegImm(kOpCmp, reg, check_value); - branch = NewLIR2(kThumbBCond, 0, arm_cond); + branch = NewLIR2(kA64B2ct, arm_cond, 0); } branch->target = target; return branch; } LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { + bool dest_is_fp = r_dest.IsFloat(); + bool src_is_fp = r_src.IsFloat(); + ArmOpcode opcode = kA64Brk1d; LIR* res; - int opcode; - // If src or dest is a pair, we'll be using low reg. - if (r_dest.IsPair()) { - r_dest = r_dest.GetLow(); - } - if (r_src.IsPair()) { - r_src = r_src.GetLow(); + + if (LIKELY(dest_is_fp == src_is_fp)) { + if (LIKELY(!dest_is_fp)) { + // Core/core copy. + // Copies involving the sp register require a different instruction. + opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr; + + // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction. + // This currently works because the other arguments are set to 0 by default. We should + // rather introduce an alias kA64Mov2RR. + + // core/core copy. Do a x/x copy only if both registers are x. + if (r_dest.Is64Bit() && r_src.Is64Bit()) { + opcode = WIDE(opcode); + } + } else { + // Float/float copy. + bool dest_is_double = r_dest.IsDouble(); + bool src_is_double = r_src.IsDouble(); + + // We do not do float/double or double/float casts here. + DCHECK_EQ(dest_is_double, src_is_double); + + // Homogeneous float/float copy. + opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff; + } + } else { + // Inhomogeneous register copy. + if (dest_is_fp) { + if (r_dest.IsDouble()) { + opcode = kA64Fmov2Sx; + } else { + DCHECK(r_src.IsSingle()); + opcode = kA64Fmov2sw; + } + } else { + if (r_src.IsDouble()) { + opcode = kA64Fmov2xS; + } else { + DCHECK(r_dest.Is32Bit()); + opcode = kA64Fmov2ws; + } + } } - if (r_dest.IsFloat() || r_src.IsFloat()) - return OpFpRegCopy(r_dest, r_src); - if (r_dest.Low8() && r_src.Low8()) - opcode = kThumbMovRR; - else if (!r_dest.Low8() && !r_src.Low8()) - opcode = kThumbMovRR_H2H; - else if (r_dest.Low8()) - opcode = kThumbMovRR_H2L; - else - opcode = kThumbMovRR_L2H; + res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { res->flags.is_nop = true; } + return res; } @@ -369,33 +295,7 @@ void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { } void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { - if (r_dest != r_src) { - bool dest_fp = r_dest.IsFloat(); - bool src_fp = r_src.IsFloat(); - DCHECK(r_dest.Is64Bit()); - DCHECK(r_src.Is64Bit()); - if (dest_fp) { - if (src_fp) { - OpRegCopy(r_dest, r_src); - } else { - NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg()); - } - } else { - if (src_fp) { - NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg()); - } else { - // Handle overlap - if (r_src.GetHighReg() == r_dest.GetLowReg()) { - DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg()); - OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); - OpRegCopy(r_dest.GetLow(), r_src.GetLow()); - } else { - OpRegCopy(r_dest.GetLow(), r_src.GetLow()); - OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); - } - } - } - } + OpRegCopy(r_dest, r_src); } // Table of magic divisors @@ -427,6 +327,12 @@ static const MagicTable magic_table[] = { // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, RegLocation rl_dest, int lit) { + // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table. + // It should be possible subtracting one from all its entries, and using smaddl + // to counteract this. The advantage is that integers should then be easier to + // encode as logical immediates (0x55555555 rather than 0x55555556). + UNIMPLEMENTED(FATAL); + if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) { return false; } @@ -434,6 +340,10 @@ bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_d if (pattern == DivideNone) { return false; } + // Tuning: add rem patterns + if (!is_div) { + return false; + } RegStorage r_magic = AllocTemp(); LoadConstant(r_magic, magic_table[lit].magic); @@ -441,182 +351,43 @@ bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_d RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage r_hi = AllocTemp(); RegStorage r_lo = AllocTemp(); - - // rl_dest and rl_src might overlap. - // Reuse r_hi to save the div result for reminder case. - RegStorage r_div_result = is_div ? rl_result.reg : r_hi; - - NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); + NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr); switch (pattern) { case Divide3: - OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31)); + OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(), + rl_src.reg.GetReg(), EncodeShift(kA64Asr, 31)); break; case Divide5: OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); - OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, - EncodeShift(kArmAsr, magic_table[lit].shift)); + OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(), + EncodeShift(kA64Asr, magic_table[lit].shift)); break; case Divide7: OpRegReg(kOpAdd, r_hi, rl_src.reg); OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); - OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, - EncodeShift(kArmAsr, magic_table[lit].shift)); + OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(), + EncodeShift(kA64Asr, magic_table[lit].shift)); break; default: LOG(FATAL) << "Unexpected pattern: " << pattern; } - - if (!is_div) { - // div_result = src / lit - // tmp1 = div_result * lit - // dest = src - tmp1 - RegStorage tmp1 = r_lo; - EasyMultiplyOp ops[2]; - - bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops); - DCHECK_NE(canEasyMultiply, false); - - GenEasyMultiplyTwoOps(tmp1, r_div_result, ops); - OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1); - } - StoreValue(rl_dest, rl_result); return true; } -// Try to convert *lit to 1 RegRegRegShift/RegRegShift form. -bool Arm64Mir2Lir::GetEasyMultiplyOp(int lit, Arm64Mir2Lir::EasyMultiplyOp* op) { - if (IsPowerOfTwo(lit)) { - op->op = kOpLsl; - op->shift = LowestSetBit(lit); - return true; - } - - if (IsPowerOfTwo(lit - 1)) { - op->op = kOpAdd; - op->shift = LowestSetBit(lit - 1); - return true; - } - - if (IsPowerOfTwo(lit + 1)) { - op->op = kOpRsub; - op->shift = LowestSetBit(lit + 1); - return true; - } - - op->op = kOpInvalid; - op->shift = 0; - return false; -} - -// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms. -bool Arm64Mir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { - GetEasyMultiplyOp(lit, &ops[0]); - if (GetEasyMultiplyOp(lit, &ops[0])) { - ops[1].op = kOpInvalid; - ops[1].shift = 0; - return true; - } - - int lit1 = lit; - uint32_t shift = LowestSetBit(lit1); - if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { - ops[1].op = kOpLsl; - ops[1].shift = shift; - return true; - } - - lit1 = lit - 1; - shift = LowestSetBit(lit1); - if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { - ops[1].op = kOpAdd; - ops[1].shift = shift; - return true; - } - - lit1 = lit + 1; - shift = LowestSetBit(lit1); - if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { - ops[1].op = kOpRsub; - ops[1].shift = shift; - return true; - } - - return false; -} - -// Generate instructions to do multiply. -// Additional temporary register is required, -// if it need to generate 2 instructions and src/dest overlap. -void Arm64Mir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) { - // tmp1 = ( src << shift1) + [ src | -src | 0 ] - // dest = (tmp1 << shift2) + [ src | -src | 0 ] - - RegStorage r_tmp1; - if (ops[1].op == kOpInvalid) { - r_tmp1 = r_dest; - } else if (r_dest.GetReg() != r_src.GetReg()) { - r_tmp1 = r_dest; - } else { - r_tmp1 = AllocTemp(); - } - - switch (ops[0].op) { - case kOpLsl: - OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift); - break; - case kOpAdd: - OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); - break; - case kOpRsub: - OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); - break; - default: - DCHECK_EQ(ops[0].op, kOpInvalid); - break; - } - - switch (ops[1].op) { - case kOpInvalid: - return; - case kOpLsl: - OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift); - break; - case kOpAdd: - OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); - break; - case kOpRsub: - OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); - break; - default: - LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps"; - break; - } -} - bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { - EasyMultiplyOp ops[2]; - - if (!GetEasyMultiplyTwoOps(lit, ops)) { - return false; - } - - rl_src = LoadValue(rl_src, kCoreReg); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - - GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops); - StoreValue(rl_dest, rl_result); - return true; + LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64"; + return false; } RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, bool check_zero) { - LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; + LOG(FATAL) << "Unexpected use of GenDivRem for Arm64"; return rl_dest; } RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) { - LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; + LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64"; return rl_dest; } @@ -657,6 +428,9 @@ RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegSto } bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(FATAL); + DCHECK_EQ(cu_->instruction_set, kThumb2); RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = info->args[1]; @@ -665,15 +439,18 @@ bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { RegLocation rl_dest = InlineTarget(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E"); + OpIT((is_min) ? kCondGt : kCondLt, "E"); OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); - OpEndIT(it); + GenBarrier(); StoreValue(rl_dest, rl_result); return true; } bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(WARNING); + RegLocation rl_src_address = info->args[0]; // long address rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_dest = InlineTarget(info); @@ -682,23 +459,26 @@ bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { if (size == k64) { // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) { - Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow()); - Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh()); + LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); + LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); } else { - Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh()); - Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow()); + LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); + LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); } StoreValueWide(rl_dest, rl_result); } else { DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. - LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG); + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size); StoreValue(rl_dest, rl_result); } return true; } bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(WARNING); + RegLocation rl_src_address = info->args[0]; // long address rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_src_value = info->args[2]; // [size] value @@ -718,14 +498,17 @@ bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { } void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { - LOG(FATAL) << "Unexpected use of OpLea for Arm"; + LOG(FATAL) << "Unexpected use of OpLea for Arm64"; } -void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { - LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm"; +void Arm64Mir2Lir::OpTlsCmp(A64ThreadOffset offset, int val) { + LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm64"; } bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(WARNING); + DCHECK_EQ(cu_->instruction_set, kThumb2); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null @@ -745,10 +528,10 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // around the potentially locked temp by using LR for r_ptr, unconditionally. // TODO: Pass information about the need for more temps to the stack frame generation // code so that we can rely on being able to allocate enough temps. - DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp()); - MarkTemp(rs_rARM_LR); - FreeTemp(rs_rARM_LR); - LockTemp(rs_rARM_LR); + DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp()); + MarkTemp(rs_rA64_LR); + FreeTemp(rs_rA64_LR); + LockTemp(rs_rA64_LR); bool load_early = true; if (is_long) { RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() : @@ -797,7 +580,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); - RegStorage r_ptr = rs_rARM_LR; + RegStorage r_ptr = rs_rA64_LR; OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); // Free now unneeded rl_object and rl_offset to give more temps. @@ -813,9 +596,9 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { rl_expected = LoadValueWide(rl_src_expected, kCoreReg); } else { // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. - RegStorage low_reg = AllocTemp(); - RegStorage high_reg = AllocTemp(); - rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg); + int low_reg = AllocTemp().GetReg(); + int high_reg = AllocTemp().GetReg(); + rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg); rl_expected = rl_new_value; } @@ -827,42 +610,37 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegStorage r_tmp = AllocTemp(); LIR* target = NewLIR0(kPseudoTargetLabel); - LIR* it = nullptr; if (is_long) { RegStorage r_tmp_high = AllocTemp(); if (!load_early) { LoadValueDirectWide(rl_src_expected, rl_expected.reg); } - NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); + NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow()); OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh()); if (!load_early) { LoadValueDirectWide(rl_src_new_value, rl_new_value.reg); } - // Make sure we use ORR that sets the ccode - if (r_tmp.Low8() && r_tmp_high.Low8()) { - NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg()); - } else { - NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0); - } - FreeTemp(r_tmp_high); // Now unneeded - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - it = OpIT(kCondEq, "T"); - NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); + LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL); + LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL); + NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), + rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); + LIR* target2 = NewLIR0(kPseudoTargetLabel); + branch1->target = target2; + branch2->target = target2; + FreeTemp(r_tmp_high); // Now unneeded } else { - NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0); + NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0); OpRegReg(kOpSub, r_tmp, rl_expected.reg); DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - it = OpIT(kCondEq, "T"); - NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); + OpIT(kCondEq, "T"); + NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); } // Still one conditional left from OpIT(kCondEq, "T") from either branch OpRegImm(kOpCmp /* eq */, r_tmp, 1); - OpEndIT(it); - OpCondBranch(kCondEq, target); if (!load_early) { @@ -873,36 +651,37 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - it = OpIT(kCondUlt, ""); + OpIT(kCondUlt, ""); LoadConstant(rl_result.reg, 0); /* cc */ FreeTemp(r_tmp); // Now unneeded. - OpEndIT(it); // Barrier to terminate OpIT. StoreValue(rl_dest, rl_result); // Now, restore lr to its non-temp status. - Clobber(rs_rARM_LR); - UnmarkTemp(rs_rARM_LR); + Clobber(rs_rA64_LR); + UnmarkTemp(rs_rA64_LR); return true; } LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { - return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target); + return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target); } LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) { - return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count); + LOG(FATAL) << "Unexpected use of OpVldm for Arm64"; + return NULL; } LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) { - return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count); + LOG(FATAL) << "Unexpected use of OpVstm for Arm64"; + return NULL; } void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) { - OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, - EncodeShift(kArmLsl, second_bit - first_bit)); + OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(), + EncodeShift(kA64Lsl, second_bit - first_bit)); if (first_bit != 0) { OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); } @@ -910,15 +689,14 @@ void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { DCHECK(reg.IsPair()); // TODO: support k64BitSolo. - RegStorage t_reg = AllocTemp(); - NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0); - FreeTemp(t_reg); + OpRegImm64(kOpCmp, reg, 0, /*is_wide*/true); GenDivZeroCheck(kCondEq); } +// TODO(Arm64): the function below should go. // Test suspend flag, return target of taken suspend branch LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) { - NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1); + NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1); return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); } @@ -950,8 +728,8 @@ void Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { // If the same barrier already exists, don't generate another. if (barrier == nullptr - || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) { - barrier = NewLIR1(kThumb2Dmb, dmb_flavor); + || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) { + barrier = NewLIR1(kA64Dmb1B, dmb_flavor); } // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. @@ -979,136 +757,45 @@ void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { StoreValueWide(rl_dest, rl_result); } -void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_src2) { - /* - * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed - * dest = src1.lo * src2.lo; - * tmp1 += src1.lo * src2.hi; - * dest.hi += tmp1; - * - * To pull off inline multiply, we have a worst-case requirement of 7 temporary - * registers. Normally for Arm, we get 5. We can get to 6 by including - * lr in the temp set. The only problematic case is all operands and result are - * distinct, and none have been promoted. In that case, we can succeed by aggressively - * freeing operand temp registers after they are no longer needed. All other cases - * can proceed normally. We'll just punt on the case of the result having a misaligned - * overlap with either operand and send that case to a runtime handler. - */ - RegLocation rl_result; - if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) { - ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul); - FlushAllRegs(); - CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); - rl_result = GetReturnWide(false); - StoreValueWide(rl_dest, rl_result); - return; - } - - rl_src1 = LoadValueWide(rl_src1, kCoreReg); - rl_src2 = LoadValueWide(rl_src2, kCoreReg); - - int reg_status = 0; - RegStorage res_lo; - RegStorage res_hi; - bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() && - !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh()); - bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh()); - bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh()); - // Check if rl_dest is *not* either operand and we have enough temp registers. - if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) && - (dest_promoted || src1_promoted || src2_promoted)) { - // In this case, we do not need to manually allocate temp registers for result. - rl_result = EvalLoc(rl_dest, kCoreReg, true); - res_lo = rl_result.reg.GetLow(); - res_hi = rl_result.reg.GetHigh(); - } else { - res_lo = AllocTemp(); - if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) { - // In this case, we have enough temp registers to be allocated for result. - res_hi = AllocTemp(); - reg_status = 1; - } else { - // In this case, all temps are now allocated. - // res_hi will be allocated after we can free src1_hi. - reg_status = 2; - } - } - - // Temporarily add LR to the temp pool, and assign it to tmp1 - MarkTemp(rs_rARM_LR); - FreeTemp(rs_rARM_LR); - RegStorage tmp1 = rs_rARM_LR; - LockTemp(rs_rARM_LR); - - if (rl_src1.reg == rl_src2.reg) { - DCHECK(res_hi.Valid()); - DCHECK(res_lo.Valid()); - NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()); - NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(), - rl_src1.reg.GetLowReg()); - OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); - } else { - NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg()); - if (reg_status == 2) { - DCHECK(!res_hi.Valid()); - DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg()); - DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); - FreeTemp(rl_src1.reg.GetHigh()); - res_hi = AllocTemp(); - } - DCHECK(res_hi.Valid()); - DCHECK(res_lo.Valid()); - NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(), - rl_src1.reg.GetLowReg()); - NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(), - tmp1.GetReg()); - NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0); - if (reg_status == 2) { - // Clobber rl_src1 since it was corrupted. - FreeTemp(rl_src1.reg); - Clobber(rl_src1.reg); - } - } - - // Now, restore lr to its non-temp status. - FreeTemp(tmp1); - Clobber(rs_rARM_LR); - UnmarkTemp(rs_rARM_LR); - - if (reg_status != 0) { - // We had manually allocated registers for rl_result. - // Now construct a RegLocation. - rl_result = GetReturnWide(false); // Just using as a template. - rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi); - } +void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + RegLocation rl_result; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegRegShift(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), + ENCODE_NO_SHIFT, /*is_wide*/ true); + StoreValueWide(rl_dest, rl_result); +} - StoreValueWide(rl_dest, rl_result); +void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2); } void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, - RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenAddLong for Arm"; + RegLocation rl_src2) { + GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2); } void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenSubLong for Arm"; + GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2); } void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenAndLong for Arm"; + GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2); } void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of GenOrLong for Arm"; + GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2); } void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { - LOG(FATAL) << "Unexpected use of genXoLong for Arm"; + GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2); } /* @@ -1116,6 +803,9 @@ void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, Reg */ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) { + // TODO(Arm64): check this. + UNIMPLEMENTED(WARNING); + RegisterClass reg_class = RegClassBySize(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -1157,7 +847,8 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } else { // No special indexed operation, lea + load w/ displacement reg_ptr = AllocTemp(); - OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); + OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(), + EncodeShift(kA64Lsl, scale)); FreeTemp(rl_index.reg); } rl_result = EvalLoc(rl_dest, reg_class, true); @@ -1170,7 +861,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } FreeTemp(reg_len); } - LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG); + LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size); MarkPossibleNullPointerException(opt_flags); if (!constant_index) { FreeTemp(reg_ptr); @@ -1204,6 +895,9 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, */ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { + // TODO(Arm64): check this. + UNIMPLEMENTED(WARNING); + RegisterClass reg_class = RegClassBySize(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); bool constant_index = rl_index.is_const; @@ -1259,7 +953,8 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, rl_src = LoadValue(rl_src, reg_class); } if (!constant_index) { - OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); + OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(), + EncodeShift(kA64Lsl, scale)); } if (needs_range_check) { if (constant_index) { @@ -1294,6 +989,9 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { + // TODO(Arm64): check this. + UNIMPLEMENTED(WARNING); + rl_src = LoadValueWide(rl_src, kCoreReg); // Per spec, we only care about low 6 bits of shift amount. int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; @@ -1320,8 +1018,8 @@ void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, LoadConstant(rl_result.reg.GetLow(), 0); } else { OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); - OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(), - EncodeShift(kArmLsr, 32 - shift_amount)); + OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(), + EncodeShift(kA64Lsr, 32 - shift_amount)); OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount); } break; @@ -1336,8 +1034,8 @@ void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, } else { RegStorage t_reg = AllocTemp(); OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); - OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(), - EncodeShift(kArmLsl, 32 - shift_amount)); + OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(), + EncodeShift(kA64Lsl, 32 - shift_amount)); FreeTemp(t_reg); OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); } @@ -1353,8 +1051,8 @@ void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, } else { RegStorage t_reg = AllocTemp(); OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); - OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(), - EncodeShift(kArmLsl, 32 - shift_amount)); + OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(), + EncodeShift(kA64Lsl, 32 - shift_amount)); FreeTemp(t_reg); OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); } @@ -1365,8 +1063,11 @@ void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, StoreValueWide(rl_dest, rl_result); } -void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, - RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { +void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + // TODO(Arm64): implement this. + UNIMPLEMENTED(WARNING); + if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { if (!rl_src2.is_const) { // Don't bother with special handling for subtract from immediate. @@ -1385,11 +1086,10 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, return; } DCHECK(rl_src2.is_const); - int64_t val = mir_graph_->ConstantValueWide(rl_src2); - uint32_t val_lo = Low32Bits(val); - uint32_t val_hi = High32Bits(val); - int32_t mod_imm_lo = ModifiedImmediate(val_lo); - int32_t mod_imm_hi = ModifiedImmediate(val_hi); + // TODO(Arm64): implement this. + // int64_t val = mir_graph_->ConstantValueWide(rl_src2); + int32_t mod_imm_lo = -1; // ModifiedImmediate(val_lo); + int32_t mod_imm_hi = -1; // ModifiedImmediate(val_hi); // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit switch (opcode) { @@ -1409,6 +1109,7 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); // NOTE: once we've done the EvalLoc on dest, we can no longer bail. switch (opcode) { +#if 0 case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); @@ -1442,10 +1143,82 @@ void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); break; +#endif default: LOG(FATAL) << "Unexpected opcode " << opcode; } StoreValueWide(rl_dest, rl_result); } +/** + * @brief Split a register list in pairs or registers. + * + * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows: + * @code + * int reg1 = -1, reg2 = -1; + * while (reg_mask) { + * reg_mask = GenPairWise(reg_mask, & reg1, & reg2); + * if (UNLIKELY(reg2 < 0)) { + * // Single register in reg1. + * } else { + * // Pair in reg1, reg2. + * } + * } + * @endcode + */ +uint32_t Arm64Mir2Lir::GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) { + // Find first register. + int first_bit_set = __builtin_ctz(reg_mask) + 1; + int reg = *reg1 + first_bit_set; + reg_mask >>= first_bit_set; + + if (LIKELY(reg_mask)) { + // Save the first register, find the second and use the pair opcode. + int second_bit_set = __builtin_ctz(reg_mask) + 1; + *reg2 = reg; + reg_mask >>= second_bit_set; + *reg1 = reg + second_bit_set; + return reg_mask; + } + + // Use the single opcode, as we just have one register. + *reg1 = reg; + *reg2 = -1; + return reg_mask; +} + +void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) { + int reg1 = -1, reg2 = -1; + const int pop_log2_size = 3; + + for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) { + reg_mask = GenPairWise(reg_mask, & reg1, & reg2); + if (UNLIKELY(reg2 < 0)) { + // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly. + NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset); + } else { + // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly. + NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo32(reg1).GetReg(), + RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset); + } + } +} + +void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) { + int reg1 = -1, reg2 = -1; + const int pop_log2_size = 3; + + for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) { + reg_mask = GenPairWise(reg_mask, & reg1, & reg2); + if (UNLIKELY(reg2 < 0)) { + // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly. + NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset); + } else { + // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly. + NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo32(reg1).GetReg(), + RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset); + } + } +} + } // namespace art diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 233e9c2fb0..7e07e15196 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -27,39 +27,40 @@ namespace art { // TODO: rework this when c++11 support allows. static const RegStorage core_regs_arr[] = - {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF, - rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; + {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7, + rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, + rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23, + rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31}; static const RegStorage sp_regs_arr[] = - {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, - rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20, - rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30, - rs_fr31}; + {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7, + rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15, + rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23, + rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31}; static const RegStorage dp_regs_arr[] = - {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, - rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15}; + {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7, + rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15}; static const RegStorage reserved_regs_arr[] = - {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; -static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12}; + {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR}; +static const RegStorage core_temps_arr[] = + {rs_x0, rs_x1, rs_x2, rs_x3, rs_x12}; static const RegStorage sp_temps_arr[] = - {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, - rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15}; + {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7, + rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15}; static const RegStorage dp_temps_arr[] = - {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7}; + {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7}; static const std::vector<RegStorage> core_regs(core_regs_arr, - core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0])); + core_regs_arr + arraysize(core_regs_arr)); static const std::vector<RegStorage> sp_regs(sp_regs_arr, - sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0])); + sp_regs_arr + arraysize(sp_regs_arr)); static const std::vector<RegStorage> dp_regs(dp_regs_arr, - dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0])); + dp_regs_arr + arraysize(dp_regs_arr)); static const std::vector<RegStorage> reserved_regs(reserved_regs_arr, - reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0])); + reserved_regs_arr + arraysize(reserved_regs_arr)); static const std::vector<RegStorage> core_temps(core_temps_arr, - core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0])); -static const std::vector<RegStorage> sp_temps(sp_temps_arr, - sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0])); -static const std::vector<RegStorage> dp_temps(dp_temps_arr, - dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0])); + core_temps_arr + arraysize(core_temps_arr)); +static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr)); +static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr)); RegLocation Arm64Mir2Lir::LocCReturn() { return arm_loc_c_return; @@ -79,25 +80,26 @@ RegLocation Arm64Mir2Lir::LocCReturnDouble() { // Return a target-dependent special register. RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { + // TODO(Arm64): this function doesn't work for hard-float ABI. RegStorage res_reg = RegStorage::InvalidReg(); switch (reg) { - case kSelf: res_reg = rs_rARM_SELF; break; - case kSuspend: res_reg = rs_rARM_SUSPEND; break; - case kLr: res_reg = rs_rARM_LR; break; - case kPc: res_reg = rs_rARM_PC; break; - case kSp: res_reg = rs_rARM_SP; break; - case kArg0: res_reg = rs_r0; break; - case kArg1: res_reg = rs_r1; break; - case kArg2: res_reg = rs_r2; break; - case kArg3: res_reg = rs_r3; break; - case kFArg0: res_reg = rs_r0; break; - case kFArg1: res_reg = rs_r1; break; - case kFArg2: res_reg = rs_r2; break; - case kFArg3: res_reg = rs_r3; break; - case kRet0: res_reg = rs_r0; break; - case kRet1: res_reg = rs_r1; break; - case kInvokeTgt: res_reg = rs_rARM_LR; break; - case kHiddenArg: res_reg = rs_r12; break; + case kSelf: res_reg = rs_rA64_SELF; break; + case kSuspend: res_reg = rs_rA64_SUSPEND; break; + case kLr: res_reg = rs_rA64_LR; break; + case kPc: res_reg = RegStorage::InvalidReg(); break; + case kSp: res_reg = rs_rA64_SP; break; + case kArg0: res_reg = rs_x0; break; + case kArg1: res_reg = rs_x1; break; + case kArg2: res_reg = rs_x2; break; + case kArg3: res_reg = rs_x3; break; + case kFArg0: res_reg = rs_f0; break; + case kFArg1: res_reg = rs_f1; break; + case kFArg2: res_reg = rs_f2; break; + case kFArg3: res_reg = rs_f3; break; + case kRet0: res_reg = rs_x0; break; + case kRet1: res_reg = rs_x0; break; + case kInvokeTgt: res_reg = rs_rA64_LR; break; + case kHiddenArg: res_reg = rs_x12; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; } @@ -105,55 +107,37 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { } RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - // For the 32-bit internal ABI, the first 3 arguments are passed in registers. - switch (arg_num) { - case 0: - return rs_r1; - case 1: - return rs_r2; - case 2: - return rs_r3; - default: - return RegStorage::InvalidReg(); - } + return RegStorage::InvalidReg(); } /* - * Decode the register id. + * Decode the register id. This routine makes assumptions on the encoding made by RegStorage. */ uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) { - uint64_t seed; - int shift; - int reg_id = reg.GetRegNum(); - /* Each double register is equal to a pair of single-precision FP registers */ - if (reg.IsDouble()) { - seed = 0x3; - reg_id = reg_id << 1; - } else { - seed = 1; + // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor. + + int reg_raw = reg.GetRawBits(); + // Check if the shape mask is zero (i.e. invalid). + if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) { + // The zero register is not a true register. It is just an immediate zero. + return 0; } - /* FP register starts at bit position 16 */ - shift = reg.IsFloat() ? kArmFPReg0 : 0; - /* Expand the double register id into single offset */ - shift += reg_id; - return (seed << shift); + + return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask); } uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() { - return ENCODE_ARM_REG_PC; + LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64"; + return 0ULL; } -// Thumb2 specific setup. TODO: inline?: +// Arm64 specific setup. TODO: inline?: void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { - DCHECK_EQ(cu_->instruction_set, kThumb2); + DCHECK_EQ(cu_->instruction_set, kArm64); DCHECK(!lir->flags.use_def_invalid); - int opcode = lir->opcode; - // These flags are somewhat uncommon - bypass if we can. - if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 | - REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 | - REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) { + if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) { if (flags & REG_DEF_SP) { lir->u.m.def_mask |= ENCODE_ARM_REG_SP; } @@ -162,61 +146,6 @@ void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { lir->u.m.use_mask |= ENCODE_ARM_REG_SP; } - if (flags & REG_DEF_LIST0) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); - } - - if (flags & REG_DEF_LIST1) { - lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); - } - - if (flags & REG_DEF_FPCS_LIST0) { - lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); - } - - if (flags & REG_DEF_FPCS_LIST2) { - for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i); - } - } - - if (flags & REG_USE_PC) { - lir->u.m.use_mask |= ENCODE_ARM_REG_PC; - } - - /* Conservatively treat the IT block */ - if (flags & IS_IT) { - lir->u.m.def_mask = ENCODE_ALL; - } - - if (flags & REG_USE_LIST0) { - lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); - } - - if (flags & REG_USE_LIST1) { - lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); - } - - if (flags & REG_USE_FPCS_LIST0) { - lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); - } - - if (flags & REG_USE_FPCS_LIST2) { - for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i); - } - } - /* Fixup for kThumbPush/lr and kThumbPop/pc */ - if (opcode == kThumbPush || opcode == kThumbPop) { - uint64_t r8Mask = GetRegMaskCommon(rs_r8); - if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) { - lir->u.m.use_mask &= ~r8Mask; - lir->u.m.use_mask |= ENCODE_ARM_REG_LR; - } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) { - lir->u.m.def_mask &= ~r8Mask; - lir->u.m.def_mask |= ENCODE_ARM_REG_PC; - } - } if (flags & REG_DEF_LR) { lir->u.m.def_mask |= ENCODE_ARM_REG_LR; } @@ -251,92 +180,128 @@ ArmConditionCode Arm64Mir2Lir::ArmConditionEncoding(ConditionCode ccode) { return res; } -static const char* core_reg_names[16] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "rSELF", - "r10", - "r11", - "r12", - "sp", - "lr", - "pc", -}; - - -static const char* shift_names[4] = { +static const char *shift_names[4] = { "lsl", "lsr", "asr", - "ror"}; + "ror" +}; -/* Decode and print a ARM register name */ -static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) { - int i; - bool printed = false; - buf[0] = 0; - for (i = 0; i < 16; i++, vector >>= 1) { - if (vector & 0x1) { - int reg_id = i; - if (opcode == kThumbPush && i == 8) { - reg_id = rs_rARM_LR.GetRegNum(); - } else if (opcode == kThumbPop && i == 8) { - reg_id = rs_rARM_PC.GetRegNum(); - } - if (printed) { - snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id); - } else { - printed = true; - snprintf(buf, buf_size, "r%d", reg_id); - } +static const char* extend_names[8] = { + "uxtb", + "uxth", + "uxtw", + "uxtx", + "sxtb", + "sxth", + "sxtw", + "sxtx", +}; + +/* Decode and print a register extension (e.g. ", uxtb #1") */ +static void DecodeRegExtendOrShift(int operand, char *buf, size_t buf_size) { + if ((operand & (1 << 6)) == 0) { + const char *shift_name = shift_names[(operand >> 7) & 0x3]; + int amount = operand & 0x3f; + snprintf(buf, buf_size, ", %s #%d", shift_name, amount); + } else { + const char *extend_name = extend_names[(operand >> 3) & 0x7]; + int amount = operand & 0x7; + if (amount == 0) { + snprintf(buf, buf_size, ", %s", extend_name); + } else { + snprintf(buf, buf_size, ", %s #%d", extend_name, amount); } } - return buf; } -static char* DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) { - snprintf(buf, buf_size, "s%d", base); - for (int i = 1; i < count; i++) { - snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i); +#define BIT_MASK(w) ((UINT64_C(1) << (w)) - UINT64_C(1)) + +static uint64_t RotateRight(uint64_t value, unsigned rotate, unsigned width) { + DCHECK_LE(width, 64U); + rotate &= 63; + value = value & BIT_MASK(width); + return ((value & BIT_MASK(rotate)) << (width - rotate)) | (value >> rotate); +} + +static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width) { + unsigned i; + unsigned reg_size = (is_wide) ? 64 : 32; + uint64_t result = value & BIT_MASK(width); + DCHECK_NE(width, reg_size); + for (i = width; i < reg_size; i *= 2) { + result |= (result << i); } - return buf; + DCHECK_EQ(i, reg_size); + return result; } -static int32_t ExpandImmediate(int value) { - int32_t mode = (value & 0xf00) >> 8; - uint32_t bits = value & 0xff; - switch (mode) { - case 0: - return bits; - case 1: - return (bits << 16) | bits; - case 2: - return (bits << 24) | (bits << 8); - case 3: - return (bits << 24) | (bits << 16) | (bits << 8) | bits; - default: - break; +/** + * @brief Decode an immediate in the form required by logical instructions. + * + * @param is_wide Whether @p value encodes a 64-bit (as opposed to 32-bit) immediate. + * @param value The encoded logical immediates that is to be decoded. + * @return The decoded logical immediate. + * @note This is the inverse of Arm64Mir2Lir::EncodeLogicalImmediate(). + */ +uint64_t Arm64Mir2Lir::DecodeLogicalImmediate(bool is_wide, int value) { + unsigned n = (value >> 12) & 0x01; + unsigned imm_r = (value >> 6) & 0x3f; + unsigned imm_s = (value >> 0) & 0x3f; + + // An integer is constructed from the n, imm_s and imm_r bits according to + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 32 or 64-bit value, depending on destination register width. + + if (n == 1) { + DCHECK_NE(imm_s, 0x3fU); + uint64_t bits = BIT_MASK(imm_s + 1); + return RotateRight(bits, imm_r, 64); + } else { + DCHECK_NE((imm_s >> 1), 0x1fU); + for (unsigned width = 0x20; width >= 0x2; width >>= 1) { + if ((imm_s & width) == 0) { + unsigned mask = (unsigned)(width - 1); + DCHECK_NE((imm_s & mask), mask); + uint64_t bits = BIT_MASK((imm_s & mask) + 1); + return RepeatBitsAcrossReg(is_wide, RotateRight(bits, imm_r & mask, width), width); + } + } } - bits = (bits | 0x80) << 24; - return bits >> (((value & 0xf80) >> 7) - 8); + return 0; } -const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", - "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"}; +/** + * @brief Decode an 8-bit single point number encoded with EncodeImmSingle(). + */ +static float DecodeImmSingle(uint8_t small_float) { + int mantissa = (small_float & 0x0f) + 0x10; + int sign = ((small_float & 0x80) == 0) ? 1 : -1; + float signed_mantissa = static_cast<float>(sign*mantissa); + int exponent = (((small_float >> 4) & 0x7) + 4) & 0x7; + return signed_mantissa*static_cast<float>(1 << exponent)*0.0078125f; +} + +static const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"}; /* * Interpret a format string and build a string no longer than size - * See format key in Assemble.c. + * See format key in assemble_arm64.cc. */ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) { std::string buf; - int i; const char* fmt_end = &fmt[strlen(fmt)]; char tbuf[256]; const char* name; @@ -354,11 +319,24 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U); operand = lir->operands[nc-'0']; switch (*fmt++) { - case 'H': - if (operand != 0) { - snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2); - } else { + case 'e': { + // Omit ", uxtw #0" in strings like "add w0, w1, w3, uxtw #0" and + // ", uxtx #0" in strings like "add x0, x1, x3, uxtx #0" + int omittable = ((IS_WIDE(lir->opcode)) ? EncodeExtend(kA64Uxtw, 0) : + EncodeExtend(kA64Uxtw, 0)); + if (LIKELY(operand == omittable)) { + strcpy(tbuf, ""); + } else { + DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf)); + } + } + break; + case 'o': + // Omit ", lsl #0" + if (LIKELY(operand == EncodeShift(kA64Lsl, 0))) { strcpy(tbuf, ""); + } else { + DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf)); } break; case 'B': @@ -387,39 +365,60 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch } strcpy(tbuf, name); break; - case 'b': - strcpy(tbuf, "0000"); - for (i = 3; i >= 0; i--) { - tbuf[i] += operand & 1; - operand >>= 1; - } - break; - case 'n': - operand = ~ExpandImmediate(operand); - snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand); - break; - case 'm': - operand = ExpandImmediate(operand); - snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand); - break; case 's': - snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand)); + snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK); break; case 'S': - snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand)); + snprintf(tbuf, arraysize(tbuf), "d%d", operand & ARM_FP_REG_MASK); break; - case 'h': - snprintf(tbuf, arraysize(tbuf), "%04x", operand); + case 'f': + snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's', + operand & ARM_FP_REG_MASK); + break; + case 'l': { + bool is_wide = IS_WIDE(lir->opcode); + uint64_t imm = DecodeLogicalImmediate(is_wide, operand); + snprintf(tbuf, arraysize(tbuf), "%" PRId64 " (%#" PRIx64 ")", imm, imm); + } + break; + case 'I': + snprintf(tbuf, arraysize(tbuf), "%f", DecodeImmSingle(operand)); break; case 'M': + if (LIKELY(operand == 0)) + strcpy(tbuf, ""); + else + snprintf(tbuf, arraysize(tbuf), ", lsl #%d", 16*operand); + break; case 'd': snprintf(tbuf, arraysize(tbuf), "%d", operand); break; - case 'C': - operand = RegStorage::RegNum(operand); - DCHECK_LT(operand, static_cast<int>( - sizeof(core_reg_names)/sizeof(core_reg_names[0]))); - snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]); + case 'w': + if (LIKELY(operand != rwzr)) + snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask); + else + strcpy(tbuf, "wzr"); + break; + case 'W': + if (LIKELY(operand != rwsp)) + snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask); + else + strcpy(tbuf, "wsp"); + break; + case 'x': + if (LIKELY(operand != rxzr)) + snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask); + else + strcpy(tbuf, "xzr"); + break; + case 'X': + if (LIKELY(operand != rsp)) + snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask); + else + strcpy(tbuf, "sp"); + break; + case 'D': + snprintf(tbuf, arraysize(tbuf), "%d", operand*((IS_WIDE(lir->opcode)) ? 8 : 4)); break; case 'E': snprintf(tbuf, arraysize(tbuf), "%d", operand*4); @@ -427,37 +426,51 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch case 'F': snprintf(tbuf, arraysize(tbuf), "%d", operand*2); break; + case 'G': + if (LIKELY(operand == 0)) + strcpy(tbuf, ""); + else + strcpy(tbuf, (IS_WIDE(lir->opcode)) ? ", lsl #3" : ", lsl #2"); + break; case 'c': strcpy(tbuf, cc_names[operand]); break; case 't': snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)", - reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1), + reinterpret_cast<uintptr_t>(base_addr) + lir->offset + (operand << 2), lir->target); break; - case 'u': { - int offset_1 = lir->operands[0]; - int offset_2 = NEXT_LIR(lir)->operands[0]; - uintptr_t target = - (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & - ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) & - 0xfffffffc; - snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target)); - break; - } - - /* Nothing to print for BLX_2 */ - case 'v': - strcpy(tbuf, "see above"); + case 'r': { + bool is_wide = IS_WIDE(lir->opcode); + if (LIKELY(operand != rwzr && operand != rxzr)) { + snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w', + operand & RegStorage::kRegNumMask); + } else { + strcpy(tbuf, (is_wide) ? "xzr" : "wzr"); + } + } break; - case 'R': - DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf)); + case 'R': { + bool is_wide = IS_WIDE(lir->opcode); + if (LIKELY(operand != rwsp || operand != rsp)) { + snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w', + operand & RegStorage::kRegNumMask); + } else { + strcpy(tbuf, (is_wide) ? "sp" : "wsp"); + } + } break; - case 'P': - DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf)); + case 'p': + snprintf(tbuf, arraysize(tbuf), ".+%d (addr %#" PRIxPTR ")", 4*operand, + reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4*operand); break; - case 'Q': - DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf)); + case 'T': + if (LIKELY(operand == 0)) + strcpy(tbuf, ""); + else if (operand == 1) + strcpy(tbuf, ", lsl #12"); + else + strcpy(tbuf, ", DecodeError3"); break; default: strcpy(tbuf, "DecodeError1"); @@ -519,14 +532,14 @@ void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* pre } bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) { - return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond)); + return (lir->opcode == kA64B1t); } Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena) { // Sanity check - make sure encoding map lines up. - for (int i = 0; i < kArmLast; i++) { - if (Arm64Mir2Lir::EncodingMap[i].opcode != i) { + for (int i = 0; i < kA64Last; i++) { + if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) { LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name << " is wrong: expecting " << i << ", seeing " << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode); @@ -534,8 +547,8 @@ Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAlloca } } -Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, - ArenaAllocator* const arena) { +Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena) { return new Arm64Mir2Lir(cu, mir_graph, arena); } @@ -584,7 +597,7 @@ void Arm64Mir2Lir::CompilerInitializeRegAlloc() { // TODO: re-enable this when we can safely save r4 over the suspension code path. bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks(); if (no_suspend) { - GetRegInfo(rs_rARM_SUSPEND)->MarkFree(); + GetRegInfo(rs_rA64_SUSPEND)->MarkFree(); } // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. @@ -595,15 +608,7 @@ void Arm64Mir2Lir::CompilerInitializeRegAlloc() { } void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) { - DCHECK(rl_keep.wide); - DCHECK(rl_free.wide); - if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) && - (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) && - (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) && - (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) { - // No overlap, free. - FreeTemp(rl_free.reg); - } + LOG(FATAL) << "Unexpected call to FreeRegLocTemps for Arm64"; } /* @@ -613,7 +618,7 @@ void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) { */ void Arm64Mir2Lir::AdjustSpillMask() { - core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum()); + core_spill_mask_ |= (1 << rs_rA64_LR.GetRegNum()); num_core_spills_++; } @@ -649,100 +654,96 @@ void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) { /* Clobber all regs that might be used by an external C call */ void Arm64Mir2Lir::ClobberCallerSave() { - // TODO: rework this - it's gotten even more ugly. - Clobber(rs_r0); - Clobber(rs_r1); - Clobber(rs_r2); - Clobber(rs_r3); - Clobber(rs_r12); - Clobber(rs_r14lr); - Clobber(rs_fr0); - Clobber(rs_fr1); - Clobber(rs_fr2); - Clobber(rs_fr3); - Clobber(rs_fr4); - Clobber(rs_fr5); - Clobber(rs_fr6); - Clobber(rs_fr7); - Clobber(rs_fr8); - Clobber(rs_fr9); - Clobber(rs_fr10); - Clobber(rs_fr11); - Clobber(rs_fr12); - Clobber(rs_fr13); - Clobber(rs_fr14); - Clobber(rs_fr15); - Clobber(rs_dr0); - Clobber(rs_dr1); - Clobber(rs_dr2); - Clobber(rs_dr3); - Clobber(rs_dr4); - Clobber(rs_dr5); - Clobber(rs_dr6); - Clobber(rs_dr7); + // TODO(Arm64): implement this. + UNIMPLEMENTED(WARNING); + + Clobber(rs_x0); + Clobber(rs_x1); + Clobber(rs_x2); + Clobber(rs_x3); + Clobber(rs_x12); + Clobber(rs_x30); + Clobber(rs_f0); + Clobber(rs_f1); + Clobber(rs_f2); + Clobber(rs_f3); + Clobber(rs_f4); + Clobber(rs_f5); + Clobber(rs_f6); + Clobber(rs_f7); + Clobber(rs_f8); + Clobber(rs_f9); + Clobber(rs_f10); + Clobber(rs_f11); + Clobber(rs_f12); + Clobber(rs_f13); + Clobber(rs_f14); + Clobber(rs_f15); } RegLocation Arm64Mir2Lir::GetReturnWideAlt() { RegLocation res = LocCReturnWide(); - res.reg.SetLowReg(rs_r2.GetReg()); - res.reg.SetHighReg(rs_r3.GetReg()); - Clobber(rs_r2); - Clobber(rs_r3); - MarkInUse(rs_r2); - MarkInUse(rs_r3); + res.reg.SetReg(rx2); + res.reg.SetHighReg(rx3); + Clobber(rs_x2); + Clobber(rs_x3); + MarkInUse(rs_x2); + MarkInUse(rs_x3); MarkWide(res.reg); return res; } RegLocation Arm64Mir2Lir::GetReturnAlt() { RegLocation res = LocCReturn(); - res.reg.SetReg(rs_r1.GetReg()); - Clobber(rs_r1); - MarkInUse(rs_r1); + res.reg.SetReg(rx1); + Clobber(rs_x1); + MarkInUse(rs_x1); return res; } /* To be used when explicitly managing register use */ void Arm64Mir2Lir::LockCallTemps() { - LockTemp(rs_r0); - LockTemp(rs_r1); - LockTemp(rs_r2); - LockTemp(rs_r3); + LockTemp(rs_x0); + LockTemp(rs_x1); + LockTemp(rs_x2); + LockTemp(rs_x3); } /* To be used when explicitly managing register use */ void Arm64Mir2Lir::FreeCallTemps() { - FreeTemp(rs_r0); - FreeTemp(rs_r1); - FreeTemp(rs_r2); - FreeTemp(rs_r3); + FreeTemp(rs_x0); + FreeTemp(rs_x1); + FreeTemp(rs_x2); + FreeTemp(rs_x3); } -RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) { - LoadWordDisp(rs_rARM_SELF, offset.Int32Value(), rs_rARM_LR); - return rs_rARM_LR; +RegStorage Arm64Mir2Lir::LoadHelper(A64ThreadOffset offset) { + // TODO(Arm64): use LoadWordDisp instead. + // e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR); + LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64); + return rs_rA64_LR; } LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() { - RegStorage tmp = rs_r0; - Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp); - LIR* load2 = Load32Disp(tmp, 0, tmp); + RegStorage tmp = rs_x0; + LoadWordDisp(rs_rA64_SELF, A64_THREAD_SUSPEND_TRIGGER_OFFSET, tmp); + LIR* load2 = LoadWordDisp(tmp, 0, tmp); return load2; } uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return Arm64Mir2Lir::EncodingMap[opcode].flags; + return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags; } const char* Arm64Mir2Lir::GetTargetInstName(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return Arm64Mir2Lir::EncodingMap[opcode].name; + return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name; } const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) { DCHECK(!IsPseudoLirOp(opcode)); - return Arm64Mir2Lir::EncodingMap[opcode].fmt; + return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt; } /* @@ -800,4 +801,140 @@ RegStorage Arm64Mir2Lir::AllocPreservedDouble(int s_reg) { return res; } +// TODO(Arm64): reuse info in QuickArgumentVisitor? +static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used, + OpSize* op_size) { + if (loc->fp) { + int n = *num_fpr_used; + if (n < 8) { + *num_fpr_used = n + 1; + RegStorage::RegStorageKind reg_kind; + if (loc->wide) { + *op_size = kDouble; + reg_kind = RegStorage::k64BitSolo; + } else { + *op_size = kSingle; + reg_kind = RegStorage::k32BitSolo; + } + return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n); + } + } else { + int n = *num_gpr_used; + if (n < 7) { + *num_gpr_used = n + 1; + if (loc->wide) { + *op_size = k64; + return RegStorage::Solo64(n); + } else { + *op_size = k32; + return RegStorage::Solo32(n); + } + } + } + + return RegStorage::InvalidReg(); +} + +/* + * If there are any ins passed in registers that have not been promoted + * to a callee-save register, flush them to the frame. Perform initial + * assignment of promoted arguments. + * + * ArgLocs is an array of location records describing the incoming arguments + * with one location record per word of argument. + */ +void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { + int num_gpr_used = 1; + int num_fpr_used = 0; + + /* + * Dummy up a RegLocation for the incoming Method* + * It will attempt to keep kArg0 live (or copy it to home location + * if promoted). + */ + RegLocation rl_src = rl_method; + rl_src.location = kLocPhysReg; + rl_src.reg = TargetReg(kArg0); + rl_src.home = false; + MarkLive(rl_src); + + // TODO(Arm64): compress the Method pointer? + StoreValueWide(rl_method, rl_src); + + // If Method* has been promoted, explicitly flush + if (rl_method.location == kLocPhysReg) { + StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + } + + if (cu_->num_ins == 0) { + return; + } + + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + for (int i = 0; i < cu_->num_ins; i++) { + PromotionMap* v_map = &promotion_map_[start_vreg + i]; + RegLocation* t_loc = &ArgLocs[i]; + OpSize op_size; + RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size); + + if (reg.Valid()) { + if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { + OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); + } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { + OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg); + } else { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size); + if (reg.Is64Bit()) { + if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) { + LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots"; + } + i += 1; + } + } + } else { + // If arriving in frame & promoted + if (v_map->core_location == kLocPhysReg) { + LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), + RegStorage::Solo32(v_map->core_reg)); + } + if (v_map->fp_location == kLocPhysReg) { + LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg)); + } + } + } +} + +int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) { + int last_arg_reg = TargetReg(kArg3).GetReg(); + int next_reg = TargetReg(kArg1).GetReg(); + int next_arg = 0; + if (skip_this) { + next_reg++; + next_arg++; + } + for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) { + RegLocation rl_arg = info->args[next_arg++]; + rl_arg = UpdateRawLoc(rl_arg); + if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) { + RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1); + LoadValueDirectWideFixed(rl_arg, r_tmp); + next_reg++; + next_arg++; + } else { + if (rl_arg.wide) { + rl_arg = NarrowRegLoc(rl_arg); + rl_arg.is_const = false; + } + LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg)); + } + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + return call_state; +} + } // namespace art diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index 3782bc9ead..e46e201565 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -20,133 +20,236 @@ namespace art { -/* This file contains codegen for the Thumb ISA. */ - -static int32_t EncodeImmSingle(int32_t value) { - int32_t res; - int32_t bit_a = (value & 0x80000000) >> 31; - int32_t not_bit_b = (value & 0x40000000) >> 30; - int32_t bit_b = (value & 0x20000000) >> 29; - int32_t b_smear = (value & 0x3e000000) >> 25; - int32_t slice = (value & 0x01f80000) >> 19; - int32_t zeroes = (value & 0x0007ffff); - if (zeroes != 0) +/* This file contains codegen for the A64 ISA. */ + +static int32_t EncodeImmSingle(uint32_t bits) { + /* + * Valid values will have the form: + * + * aBbb.bbbc.defg.h000.0000.0000.0000.0000 + * + * where B = not(b). In other words, if b == 1, then B == 0 and viceversa. + */ + + // bits[19..0] are cleared. + if ((bits & 0x0007ffff) != 0) return -1; - if (bit_b) { - if ((not_bit_b != 0) || (b_smear != 0x1f)) - return -1; - } else { - if ((not_bit_b != 1) || (b_smear != 0x0)) - return -1; - } - res = (bit_a << 7) | (bit_b << 6) | slice; - return res; + + // bits[29..25] are all set or all cleared. + uint32_t b_pattern = (bits >> 16) & 0x3e00; + if (b_pattern != 0 && b_pattern != 0x3e00) + return -1; + + // bit[30] and bit[29] are opposite. + if (((bits ^ (bits << 1)) & 0x40000000) == 0) + return -1; + + // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 + // bit7: a000.0000 + uint32_t bit7 = ((bits >> 31) & 0x1) << 7; + // bit6: 0b00.0000 + uint32_t bit6 = ((bits >> 29) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint32_t bit5_to_0 = (bits >> 19) & 0x3f; + return (bit7 | bit6 | bit5_to_0); } -/* - * Determine whether value can be encoded as a Thumb2 floating point - * immediate. If not, return -1. If so return encoded 8-bit value. - */ -static int32_t EncodeImmDouble(int64_t value) { - int32_t res; - int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63; - int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62; - int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61; - int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54; - int32_t slice = (value & INT64_C(0x003f000000000000)) >> 48; - uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff)); - if (zeroes != 0ull) +static int32_t EncodeImmDouble(uint64_t bits) { + /* + * Valid values will have the form: + * + * aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + * 0000.0000.0000.0000.0000.0000.0000.0000 + * + * where B = not(b). + */ + + // bits[47..0] are cleared. + if ((bits & UINT64_C(0xffffffffffff)) != 0) return -1; - if (bit_b) { - if ((not_bit_b != 0) || (b_smear != 0xff)) - return -1; - } else { - if ((not_bit_b != 1) || (b_smear != 0x0)) - return -1; - } - res = (bit_a << 7) | (bit_b << 6) | slice; - return res; + + // bits[61..54] are all set or all cleared. + uint32_t b_pattern = (bits >> 48) & 0x3fc0; + if (b_pattern != 0 && b_pattern != 0x3fc0) + return -1; + + // bit[62] and bit[61] are opposite. + if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0) + return -1; + + // bit7: a000.0000 + uint32_t bit7 = ((bits >> 63) & 0x1) << 7; + // bit6: 0b00.0000 + uint32_t bit6 = ((bits >> 61) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint32_t bit5_to_0 = (bits >> 48) & 0x3f; + return (bit7 | bit6 | bit5_to_0); } -LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int value) { +LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) { DCHECK(RegStorage::IsSingle(r_dest)); if (value == 0) { - // TODO: we need better info about the target CPU. a vector exclusive or - // would probably be better here if we could rely on its existance. - // Load an immediate +2.0 (which encodes to 0) - NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0); - // +0.0 = +2.0 - +2.0 - return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest); + return NewLIR2(kA64Fmov2sw, r_dest, rwzr); } else { - int encoded_imm = EncodeImmSingle(value); + int32_t encoded_imm = EncodeImmSingle((uint32_t)value); if (encoded_imm >= 0) { - return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm); + return NewLIR2(kA64Fmov2fI, r_dest, encoded_imm); } } + LIR* data_target = ScanLiteralPool(literal_list_, value, 0); if (data_target == NULL) { data_target = AddWordData(&literal_list_, value); } - LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, - r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target); + + LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, + r_dest, 0, 0, 0, 0, data_target); SetMemRefType(load_pc_rel, true, kLiteral); AppendLIR(load_pc_rel); return load_pc_rel; } -static int LeadingZeros(uint32_t val) { - uint32_t alt; - int32_t n; - int32_t count; - - count = 16; - n = 32; - do { - alt = val >> count; - if (alt != 0) { - n = n - count; - val = alt; +LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) { + DCHECK(RegStorage::IsDouble(r_dest)); + if (value == 0) { + return NewLIR2(kA64Fmov2Sx, r_dest, rwzr); + } else { + int32_t encoded_imm = EncodeImmDouble(value); + if (encoded_imm >= 0) { + return NewLIR2(FWIDE(kA64Fmov2fI), r_dest, encoded_imm); } - count >>= 1; - } while (count); - return n - val; + } + + // No short form - load from the literal pool. + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); + LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); + if (data_target == NULL) { + data_target = AddWideData(&literal_list_, val_lo, val_hi); + } + + DCHECK(RegStorage::IsFloat(r_dest)); + LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp), + r_dest, 0, 0, 0, 0, data_target); + SetMemRefType(load_pc_rel, true, kLiteral); + AppendLIR(load_pc_rel); + return load_pc_rel; } -/* - * Determine whether value can be encoded as a Thumb2 modified - * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. +static int CountLeadingZeros(bool is_wide, uint64_t value) { + return (is_wide) ? __builtin_clzl(value) : __builtin_clz((uint32_t)value); +} + +static int CountTrailingZeros(bool is_wide, uint64_t value) { + return (is_wide) ? __builtin_ctzl(value) : __builtin_ctz((uint32_t)value); +} + +static int CountSetBits(bool is_wide, uint64_t value) { + return ((is_wide) ? + __builtin_popcountl(value) : __builtin_popcount((uint32_t)value)); +} + +/** + * @brief Try encoding an immediate in the form required by logical instructions. + * + * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value. + * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as + * 32-bit if @p is_wide is false. + * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed. + * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate(). */ -int Arm64Mir2Lir::ModifiedImmediate(uint32_t value) { - int32_t z_leading; - int32_t z_trailing; - uint32_t b0 = value & 0xff; - - /* Note: case of value==0 must use 0:000:0:0000000 encoding */ - if (value <= 0xFF) - return b0; // 0:000:a:bcdefgh - if (value == ((b0 << 16) | b0)) - return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */ - if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0)) - return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */ - b0 = (value >> 8) & 0xff; - if (value == ((b0 << 24) | (b0 << 8))) - return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */ - /* Can we do it with rotation? */ - z_leading = LeadingZeros(value); - z_trailing = 32 - LeadingZeros(~value & (value - 1)); - /* A run of eight or fewer active bits? */ - if ((z_leading + z_trailing) < 24) - return -1; /* No - bail */ - /* left-justify the constant, discarding msb (known to be 1) */ - value <<= z_leading + 1; - /* Create bcdefgh */ - value >>= 25; - /* Put it all together */ - return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */ +int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { + unsigned n, imm_s, imm_r; + + // Logical immediates are encoded using parameters n, imm_s and imm_r using + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 32 or 64-bit value, depending on destination register width. + // + // To test if an arbitary immediate can be encoded using this scheme, an + // iterative algorithm is used. + // + + // 1. If the value has all set or all clear bits, it can't be encoded. + if (value == 0 || value == ~UINT64_C(0) || + (!is_wide && (uint32_t)value == ~UINT32_C(0))) { + return -1; + } + + unsigned lead_zero = CountLeadingZeros(is_wide, value); + unsigned lead_one = CountLeadingZeros(is_wide, ~value); + unsigned trail_zero = CountTrailingZeros(is_wide, value); + unsigned trail_one = CountTrailingZeros(is_wide, ~value); + unsigned set_bits = CountSetBits(is_wide, value); + + // The fixed bits in the immediate s field. + // If width == 64 (X reg), start at 0xFFFFFF80. + // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit + // widths won't be executed. + unsigned width = (is_wide) ? 64 : 32; + int imm_s_fixed = (is_wide) ? -128 : -64; + int imm_s_mask = 0x3f; + + for (;;) { + // 2. If the value is two bits wide, it can be encoded. + if (width == 2) { + n = 0; + imm_s = 0x3C; + imm_r = (value & 3) - 1; + break; + } + + n = (width == 64) ? 1 : 0; + imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); + if ((lead_zero + set_bits) == width) { + imm_r = 0; + } else { + imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; + } + + // 3. If the sum of leading zeros, trailing zeros and set bits is + // equal to the bit width of the value, it can be encoded. + if (lead_zero + trail_zero + set_bits == width) { + break; + } + + // 4. If the sum of leading ones, trailing ones and unset bits in the + // value is equal to the bit width of the value, it can be encoded. + if (lead_one + trail_one + (width - set_bits) == width) { + break; + } + + // 5. If the most-significant half of the bitwise value is equal to + // the least-significant half, return to step 2 using the + // least-significant half of the value. + uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1; + if ((value & mask) == ((value >> (width >> 1)) & mask)) { + width >>= 1; + set_bits >>= 1; + imm_s_fixed >>= 1; + continue; + } + + // 6. Otherwise, the value can't be encoded. + return -1; + } + + return (n << 12 | imm_r << 6 | imm_s); } bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { - return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); + return false; // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); } bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { @@ -162,8 +265,8 @@ bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { } /* - * Load a immediate using a shortcut if possible; otherwise - * grab from the per-translation literal pool. + * Load a immediate using one single instruction when possible; otherwise + * use a pair of movz and movk instructions. * * No additional register clobbering operation performed. Use this version when * 1) r_dest is freshly returned from AllocTemp or @@ -171,204 +274,163 @@ bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { */ LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { LIR* res; - int mod_imm; if (r_dest.IsFloat()) { return LoadFPConstantValue(r_dest.GetReg(), value); } - /* See if the value can be constructed cheaply */ - if (r_dest.Low8() && (value >= 0) && (value <= 255)) { - return NewLIR2(kThumbMovImm, r_dest.GetReg(), value); - } - /* Check Modified immediate special cases */ - mod_imm = ModifiedImmediate(value); - if (mod_imm >= 0) { - res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm); - return res; - } - mod_imm = ModifiedImmediate(~value); - if (mod_imm >= 0) { - res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm); - return res; - } - /* 16-bit immediate? */ - if ((value & 0xffff) == value) { - res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value); - return res; + // Loading SP/ZR with an immediate is not supported. + DCHECK_NE(r_dest.GetReg(), rwsp); + DCHECK_NE(r_dest.GetReg(), rwzr); + + // Compute how many movk, movz instructions are needed to load the value. + uint16_t high_bits = High16Bits(value); + uint16_t low_bits = Low16Bits(value); + + bool low_fast = ((uint16_t)(low_bits + 1) <= 1); + bool high_fast = ((uint16_t)(high_bits + 1) <= 1); + + if (LIKELY(low_fast || high_fast)) { + // 1 instruction is enough to load the immediate. + if (LIKELY(low_bits == high_bits)) { + // Value is either 0 or -1: we can just use wzr. + ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr; + res = NewLIR2(opcode, r_dest.GetReg(), rwzr); + } else { + uint16_t uniform_bits, useful_bits; + int shift; + + if (LIKELY(high_fast)) { + shift = 0; + uniform_bits = high_bits; + useful_bits = low_bits; + } else { + shift = 1; + uniform_bits = low_bits; + useful_bits = high_bits; + } + + if (UNLIKELY(uniform_bits != 0)) { + res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift); + } else { + res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift); + } + } + } else { + // movk, movz require 2 instructions. Try detecting logical immediates. + int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value); + if (log_imm >= 0) { + res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm); + } else { + // Use 2 instructions. + res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0); + NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1); + } } - /* Do a low/high pair */ - res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value)); - NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value)); + return res; } LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) { - LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched during assembly */); + LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */); res->target = target; return res; } LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { - // This is kThumb2BCond instead of kThumbBCond for performance reasons. The assembly - // time required for a new pass after kThumbBCond is fixed up to kThumb2BCond is - // substantial. - LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */, - ArmConditionEncoding(cc)); + LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc), + 0 /* offset to be patched */); branch->target = target; return branch; } LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { - ArmOpcode opcode = kThumbBkpt; + ArmOpcode opcode = kA64Brk1d; switch (op) { case kOpBlx: - opcode = kThumbBlxR; - break; - case kOpBx: - opcode = kThumbBx; + opcode = kA64Blr1x; break; + // TODO(Arm64): port kThumbBx. + // case kOpBx: + // opcode = kThumbBx; + // break; default: LOG(FATAL) << "Bad opcode " << op; } return NewLIR1(opcode, r_dest_src.GetReg()); } -LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, - int shift) { - bool thumb_form = - ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8()); - ArmOpcode opcode = kThumbBkpt; - switch (op) { - case kOpAdc: - opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR; - break; - case kOpAnd: - opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR; - break; - case kOpBic: - opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR; - break; +LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, + int shift, bool is_wide) { + ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); + ArmOpcode opcode = kA64Brk1d; + + switch (OP_KIND_UNWIDE(op)) { case kOpCmn: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR; + opcode = kA64Cmn3Rro; break; case kOpCmp: - if (thumb_form) - opcode = kThumbCmpRR; - else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8()) - opcode = kThumbCmpHH; - else if ((shift == 0) && r_dest_src1.Low8()) - opcode = kThumbCmpLH; - else if (shift == 0) - opcode = kThumbCmpHL; - else - opcode = kThumb2CmpRR; - break; - case kOpXor: - opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR; + // TODO(Arm64): check the instruction above: "cmp w0, w1" is rendered as "cmp w0, w1, uxtb". + opcode = kA64Cmp3Rro; break; case kOpMov: - DCHECK_EQ(shift, 0); - if (r_dest_src1.Low8() && r_src2.Low8()) - opcode = kThumbMovRR; - else if (!r_dest_src1.Low8() && !r_src2.Low8()) - opcode = kThumbMovRR_H2H; - else if (r_dest_src1.Low8()) - opcode = kThumbMovRR_H2L; - else - opcode = kThumbMovRR_L2H; - break; - case kOpMul: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR; + opcode = kA64Mov2rr; break; case kOpMvn: - opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR; + opcode = kA64Mvn2rr; break; case kOpNeg: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR; - break; - case kOpOr: - opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR; - break; - case kOpSbc: - opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR; + opcode = kA64Neg3rro; break; case kOpTst: - opcode = (thumb_form) ? kThumbTst : kThumb2TstRR; - break; - case kOpLsl: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR; - break; - case kOpLsr: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR; - break; - case kOpAsr: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR; - break; - case kOpRor: - DCHECK_EQ(shift, 0); - opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR; - break; - case kOpAdd: - opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; - break; - case kOpSub: - opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; + opcode = kA64Tst3rro; break; case kOpRev: DCHECK_EQ(shift, 0); - if (!thumb_form) { - // Binary, but rm is encoded twice. - return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); - } - opcode = kThumbRev; + // Binary, but rm is encoded twice. + return NewLIR3(kA64Rev2rr | wide, r_dest_src1, r_src2, r_src2); break; case kOpRevsh: - DCHECK_EQ(shift, 0); - if (!thumb_form) { - // Binary, but rm is encoded twice. - return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); - } - opcode = kThumbRevsh; + // Binary, but rm is encoded twice. + return NewLIR3(kA64Rev162rr | wide, r_dest_src1, r_src2, r_src2); break; case kOp2Byte: - DCHECK_EQ(shift, 0); - return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8); + DCHECK_EQ(shift, ENCODE_NO_SHIFT); + // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)". + // For now we use sbfm directly. + return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 7); case kOp2Short: - DCHECK_EQ(shift, 0); - return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16); + DCHECK_EQ(shift, ENCODE_NO_SHIFT); + // For now we use sbfm rather than its alias, sbfx. + return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 15); case kOp2Char: - DCHECK_EQ(shift, 0); - return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16); + // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)". + // For now we use ubfm directly. + DCHECK_EQ(shift, ENCODE_NO_SHIFT); + return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1, r_src2, 0, 15); default: - LOG(FATAL) << "Bad opcode: " << op; - break; + return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift); } + DCHECK(!IsPseudoLirOp(opcode)); if (EncodingMap[opcode].flags & IS_BINARY_OP) { - return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg()); + DCHECK_EQ(shift, ENCODE_NO_SHIFT); + return NewLIR2(opcode | wide, r_dest_src1, r_src2); } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { - if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) { - return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift); - } else { - return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg()); + ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; + if (kind == kFmtExtend || kind == kFmtShift) { + DCHECK_EQ(kind == kFmtExtend, IsExtendEncoding(shift)); + return NewLIR3(opcode | wide, r_dest_src1, r_src2, shift); } - } else if (EncodingMap[opcode].flags & IS_QUAD_OP) { - return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift); - } else { - LOG(FATAL) << "Unexpected encoding operand count"; - return NULL; } + + LOG(FATAL) << "Unexpected encoding operand count"; + return NULL; } LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { - return OpRegRegShift(op, r_dest_src1, r_src2, 0); + return OpRegRegShift(op, r_dest_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT, + r_dest_src1.Is64Bit()); } LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { @@ -382,207 +444,162 @@ LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, } LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { - LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm"; + LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64"; return NULL; } -LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, - RegStorage r_src2, int shift) { - ArmOpcode opcode = kThumbBkpt; - bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8(); - switch (op) { +LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, + int r_src2, int shift, bool is_wide) { + ArmOpcode opcode = kA64Brk1d; + + switch (OP_KIND_UNWIDE(op)) { case kOpAdd: - opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; + opcode = kA64Add4rrro; break; case kOpSub: - opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; - break; - case kOpRsub: - opcode = kThumb2RsubRRR; + opcode = kA64Sub4rrro; break; + // case kOpRsub: + // opcode = kA64RsubWWW; + // break; case kOpAdc: - opcode = kThumb2AdcRRR; + opcode = kA64Adc3rrr; break; case kOpAnd: - opcode = kThumb2AndRRR; - break; - case kOpBic: - opcode = kThumb2BicRRR; + opcode = kA64And4rrro; break; case kOpXor: - opcode = kThumb2EorRRR; + opcode = kA64Eor4rrro; break; case kOpMul: - DCHECK_EQ(shift, 0); - opcode = kThumb2MulRRR; + opcode = kA64Mul3rrr; break; case kOpDiv: - DCHECK_EQ(shift, 0); - opcode = kThumb2SdivRRR; + opcode = kA64Sdiv3rrr; break; case kOpOr: - opcode = kThumb2OrrRRR; + opcode = kA64Orr4rrro; break; case kOpSbc: - opcode = kThumb2SbcRRR; + opcode = kA64Sbc3rrr; break; case kOpLsl: - DCHECK_EQ(shift, 0); - opcode = kThumb2LslRRR; + opcode = kA64Lsl3rrr; break; case kOpLsr: - DCHECK_EQ(shift, 0); - opcode = kThumb2LsrRRR; + opcode = kA64Lsr3rrr; break; case kOpAsr: - DCHECK_EQ(shift, 0); - opcode = kThumb2AsrRRR; + opcode = kA64Asr3rrr; break; case kOpRor: - DCHECK_EQ(shift, 0); - opcode = kThumb2RorRRR; + opcode = kA64Ror3rrr; break; default: LOG(FATAL) << "Bad opcode: " << op; break; } - DCHECK(!IsPseudoLirOp(opcode)); + + // The instructions above belong to two kinds: + // - 4-operands instructions, where the last operand is a shift/extend immediate, + // - 3-operands instructions with no shift/extend. + ArmOpcode widened_opcode = (is_wide) ? WIDE(opcode) : opcode; if (EncodingMap[opcode].flags & IS_QUAD_OP) { - return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); + DCHECK_EQ(shift, ENCODE_NO_SHIFT); + return NewLIR4(widened_opcode, r_dest, r_src1, r_src2, shift); } else { DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); - return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); + DCHECK_EQ(shift, ENCODE_NO_SHIFT); + return NewLIR3(widened_opcode, r_dest, r_src1, r_src2); } } LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { - return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0); + return OpRegRegRegShift(op, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT); } LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { LIR* res; bool neg = (value < 0); - int32_t abs_value = (neg) ? -value : value; - ArmOpcode opcode = kThumbBkpt; - ArmOpcode alt_opcode = kThumbBkpt; - bool all_low_regs = r_dest.Low8() && r_src1.Low8(); - int32_t mod_imm = ModifiedImmediate(value); + int64_t abs_value = (neg) ? -value : value; + ArmOpcode opcode = kA64Brk1d; + ArmOpcode alt_opcode = kA64Brk1d; + int32_t log_imm = -1; + bool is_wide = OP_KIND_IS_WIDE(op); + ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); - switch (op) { - case kOpLsl: - if (all_low_regs) - return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value); - else - return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + switch (OP_KIND_UNWIDE(op)) { + case kOpLsl: { + // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)" + // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)". + // For now, we just use ubfm directly. + int max_value = (is_wide) ? 64 : 32; + return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(), + (-value) & (max_value - 1), max_value - value); + } case kOpLsr: - if (all_low_regs) - return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); - else - return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); case kOpAsr: - if (all_low_regs) - return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); - else - return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); case kOpRor: - return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm". + // For now, we just use extr directly. + return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(), + value); case kOpAdd: - if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) { - return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2); - } else if (r_dest.Low8() && (r_src1 == rs_r15pc) && - (value <= 1020) && ((value & 0x3) == 0)) { - return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2); - } + neg = !neg; // Note: intentional fallthrough case kOpSub: - if (all_low_regs && ((abs_value & 0x7) == abs_value)) { - if (op == kOpAdd) - opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3; - else - opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3; - return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value); - } - if (mod_imm < 0) { - mod_imm = ModifiedImmediate(-value); - if (mod_imm >= 0) { - op = (op == kOpAdd) ? kOpSub : kOpAdd; - } - } - if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) { - // This is deliberately used only if modified immediate encoding is inadequate since - // we sometimes actually use the flags for small values but not necessarily low regs. - if (op == kOpAdd) - opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12; - else - opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12; - return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value); - } - if (op == kOpSub) { - opcode = kThumb2SubRRI8M; - alt_opcode = kThumb2SubRRR; + // Add and sub below read/write sp rather than xzr. + if (abs_value < 0x1000) { + opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; + return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0); + } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { + opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; + return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); } else { - opcode = kThumb2AddRRI8M; - alt_opcode = kThumb2AddRRR; + log_imm = -1; + alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro; } break; - case kOpRsub: - opcode = kThumb2RsubRRI8M; - alt_opcode = kThumb2RsubRRR; - break; + // case kOpRsub: + // opcode = kThumb2RsubRRI8M; + // alt_opcode = kThumb2RsubRRR; + // break; case kOpAdc: - opcode = kThumb2AdcRRI8M; - alt_opcode = kThumb2AdcRRR; + log_imm = -1; + alt_opcode = kA64Adc3rrr; break; case kOpSbc: - opcode = kThumb2SbcRRI8M; - alt_opcode = kThumb2SbcRRR; + log_imm = -1; + alt_opcode = kA64Sbc3rrr; break; case kOpOr: - opcode = kThumb2OrrRRI8M; - alt_opcode = kThumb2OrrRRR; + log_imm = EncodeLogicalImmediate(is_wide, value); + opcode = kA64Orr3Rrl; + alt_opcode = kA64Orr4rrro; break; case kOpAnd: - if (mod_imm < 0) { - mod_imm = ModifiedImmediate(~value); - if (mod_imm >= 0) { - return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm); - } - } - opcode = kThumb2AndRRI8M; - alt_opcode = kThumb2AndRRR; + log_imm = EncodeLogicalImmediate(is_wide, value); + opcode = kA64And3Rrl; + alt_opcode = kA64And4rrro; break; case kOpXor: - opcode = kThumb2EorRRI8M; - alt_opcode = kThumb2EorRRR; + log_imm = EncodeLogicalImmediate(is_wide, value); + opcode = kA64Eor3Rrl; + alt_opcode = kA64Eor4rrro; break; case kOpMul: // TUNING: power of 2, shift & add - mod_imm = -1; - alt_opcode = kThumb2MulRRR; + log_imm = -1; + alt_opcode = kA64Mul3rrr; break; - case kOpCmp: { - LIR* res; - if (mod_imm >= 0) { - res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm); - } else { - mod_imm = ModifiedImmediate(-value); - if (mod_imm >= 0) { - res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm); - } else { - RegStorage r_tmp = AllocTemp(); - res = LoadConstant(r_tmp, value); - OpRegReg(kOpCmp, r_src1, r_tmp); - FreeTemp(r_tmp); - } - } - return res; - } default: LOG(FATAL) << "Bad opcode: " << op; } - if (mod_imm >= 0) { - return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm); + if (log_imm >= 0) { + return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); } else { RegStorage r_scratch = AllocTemp(); LoadConstant(r_scratch, value); @@ -595,226 +612,209 @@ LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, } } -/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */ LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { + return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value), /*is_wide*/false); +} + +LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide) { + ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); + ArmOpcode opcode = kA64Brk1d; + ArmOpcode neg_opcode = kA64Brk1d; + bool shift; bool neg = (value < 0); - int32_t abs_value = (neg) ? -value : value; - bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8()); - ArmOpcode opcode = kThumbBkpt; - switch (op) { + uint64_t abs_value = (neg) ? -value : value; + + if (LIKELY(abs_value < 0x1000)) { + // abs_value is a 12-bit immediate. + shift = false; + } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { + // abs_value is a shifted 12-bit immediate. + shift = true; + abs_value >>= 12; + } else { + RegStorage r_tmp = AllocTemp(); + LIR* res = LoadConstant(r_tmp, value); + OpRegReg(op, r_dest_src1, r_tmp); + FreeTemp(r_tmp); + return res; + } + + switch (OP_KIND_UNWIDE(op)) { case kOpAdd: - if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */ - DCHECK_EQ((value & 0x3), 0); - return NewLIR1(kThumbAddSpI7, value >> 2); - } else if (short_form) { - opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8; - } + neg_opcode = kA64Sub4RRdT; + opcode = kA64Add4RRdT; break; case kOpSub: - if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */ - DCHECK_EQ((value & 0x3), 0); - return NewLIR1(kThumbSubSpI7, value >> 2); - } else if (short_form) { - opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8; - } + neg_opcode = kA64Add4RRdT; + opcode = kA64Sub4RRdT; break; case kOpCmp: - if (!neg && short_form) { - opcode = kThumbCmpRI8; - } else { - short_form = false; - } + neg_opcode = kA64Cmn3RdT; + opcode = kA64Cmp3RdT; break; default: - /* Punt to OpRegRegImm - if bad case catch it there */ - short_form = false; + LOG(FATAL) << "Bad op-kind in OpRegImm: " << op; break; } - if (short_form) { - return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value); - } else { - return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); - } + + if (UNLIKELY(neg)) + opcode = neg_opcode; + + if (EncodingMap[opcode].flags & IS_QUAD_OP) + return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value, + (shift) ? 1 : 0); + else + return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0); } LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { - LIR* res = NULL; - int32_t val_lo = Low32Bits(value); - int32_t val_hi = High32Bits(value); if (r_dest.IsFloat()) { - DCHECK(!r_dest.IsPair()); - if ((val_lo == 0) && (val_hi == 0)) { - // TODO: we need better info about the target CPU. a vector exclusive or - // would probably be better here if we could rely on its existance. - // Load an immediate +2.0 (which encodes to 0) - NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0); - // +0.0 = +2.0 - +2.0 - res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg()); - } else { - int encoded_imm = EncodeImmDouble(value); - if (encoded_imm >= 0) { - res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm); - } - } + return LoadFPConstantValueWide(r_dest.GetReg(), value); } else { - // NOTE: Arm32 assumption here. - DCHECK(r_dest.IsPair()); - if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) { - res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); - LoadConstantNoClobber(r_dest.GetHigh(), val_hi); - } - } - if (res == NULL) { + // TODO(Arm64): check whether we can load the immediate with a short form. + // e.g. via movz, movk or via logical immediate. + // No short form - load from the literal pool. + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); if (data_target == NULL) { data_target = AddWideData(&literal_list_, val_lo, val_hi); } - if (r_dest.IsFloat()) { - res = RawLIR(current_dalvik_offset_, kThumb2Vldrd, - r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target); - } else { - DCHECK(r_dest.IsPair()); - res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8, - r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target); - } + + LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), + r_dest.GetReg(), 0, 0, 0, 0, data_target); SetMemRefType(res, true, kLiteral); AppendLIR(res); + return res; } - return res; } -int Arm64Mir2Lir::EncodeShift(int code, int amount) { - return ((amount & 0x1f) << 2) | code; +int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) { + return ((shift_type & 0x3) << 7) | (amount & 0x1f); +} + +int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) { + return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7); +} + +bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) { + return ((1 << 6) & encoded_value) != 0; } LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, - int scale, OpSize size) { - bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8(); + int scale, OpSize size) { LIR* load; - ArmOpcode opcode = kThumbBkpt; - bool thumb_form = (all_low_regs && (scale == 0)); - RegStorage reg_ptr; + ArmOpcode opcode = kA64Brk1d; + ArmOpcode wide = kA64NotWide; + + DCHECK(scale == 0 || scale == 1); if (r_dest.IsFloat()) { - if (r_dest.IsSingle()) { - DCHECK((size == k32) || (size == kSingle) || (size == kReference)); - opcode = kThumb2Vldrs; - size = kSingle; - } else { - DCHECK(r_dest.IsDouble()); - DCHECK((size == k64) || (size == kDouble)); - opcode = kThumb2Vldrd; - size = kDouble; - } - } else { - if (size == kSingle) - size = k32; + bool is_double = r_dest.IsDouble(); + bool is_single = !is_double; + DCHECK_EQ(is_single, r_dest.IsSingle()); + + // If r_dest is a single, then size must be either k32 or kSingle. + // If r_dest is a double, then size must be either k64 or kDouble. + DCHECK(!is_single || size == k32 || size == kSingle); + DCHECK(!is_double || size == k64 || size == kDouble); + return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG, + r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); } switch (size) { - case kDouble: // fall-through - // Intentional fall-though. + case kDouble: + case kWord: + case k64: + wide = kA64Wide; + // Intentional fall-trough. case kSingle: - reg_ptr = AllocTemp(); - if (scale) { - NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(), - EncodeShift(kArmLsl, scale)); - } else { - OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index); - } - load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0); - FreeTemp(reg_ptr); - return load; case k32: - // Intentional fall-though. case kReference: - opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR; + opcode = kA64Ldr4rXxG; break; case kUnsignedHalf: - opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR; + opcode = kA64Ldrh4wXxd; break; case kSignedHalf: - opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR; + opcode = kA64Ldrsh4rXxd; break; case kUnsignedByte: - opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR; + opcode = kA64Ldrb3wXx; break; case kSignedByte: - opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR; + opcode = kA64Ldrsb3rXx; break; default: LOG(FATAL) << "Bad size: " << size; } - if (thumb_form) - load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); - else - load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + + if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) { + // Tertiary ops (e.g. ldrb, ldrsb) do not support scale. + DCHECK_EQ(scale, 0); + load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); + } else { + DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2)); + load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), + (scale != 0) ? 1 : 0); + } return load; } LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, - int scale, OpSize size) { - bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8(); - LIR* store = NULL; - ArmOpcode opcode = kThumbBkpt; - bool thumb_form = (all_low_regs && (scale == 0)); - RegStorage reg_ptr; + int scale, OpSize size) { + LIR* store; + ArmOpcode opcode = kA64Brk1d; + ArmOpcode wide = kA64NotWide; + + DCHECK(scale == 0 || scale == 1); if (r_src.IsFloat()) { - if (r_src.IsSingle()) { - DCHECK((size == k32) || (size == kSingle) || (size == kReference)); - opcode = kThumb2Vstrs; - size = kSingle; - } else { - DCHECK(r_src.IsDouble()); - DCHECK((size == k64) || (size == kDouble)); - DCHECK_EQ((r_src.GetReg() & 0x1), 0); - opcode = kThumb2Vstrd; - size = kDouble; - } - } else { - if (size == kSingle) - size = k32; + bool is_double = r_src.IsDouble(); + bool is_single = !is_double; + DCHECK_EQ(is_single, r_src.IsSingle()); + + // If r_src is a single, then size must be either k32 or kSingle. + // If r_src is a double, then size must be either k64 or kDouble. + DCHECK(!is_single || size == k32 || size == kSingle); + DCHECK(!is_double || size == k64 || size == kDouble); + return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG, + r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); } switch (size) { - case kDouble: // fall-through - // Intentional fall-though. - case kSingle: - reg_ptr = AllocTemp(); - if (scale) { - NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(), - EncodeShift(kArmLsl, scale)); - } else { - OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index); - } - store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0); - FreeTemp(reg_ptr); - return store; - case k32: - // Intentional fall-though. + case kDouble: // Intentional fall-trough. + case kWord: // Intentional fall-trough. + case k64: + opcode = kA64Str4rXxG; + wide = kA64Wide; + break; + case kSingle: // Intentional fall-trough. + case k32: // Intentional fall-trough. case kReference: - opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR; + opcode = kA64Str4rXxG; break; case kUnsignedHalf: - // Intentional fall-though. case kSignedHalf: - opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR; + opcode = kA64Strh4wXxd; break; case kUnsignedByte: - // Intentional fall-though. case kSignedByte: - opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR; + opcode = kA64Strb3wXx; break; default: LOG(FATAL) << "Bad size: " << size; } - if (thumb_form) - store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); - else + + if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) { + // Tertiary ops (e.g. strb) do not support scale. + DCHECK_EQ(scale, 0); + store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); + } else { store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + } return store; } @@ -827,311 +827,245 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size) { LIR* load = NULL; - ArmOpcode opcode = kThumbBkpt; + ArmOpcode opcode = kA64Brk1d; bool short_form = false; - bool thumb2Form = (displacement < 4092 && displacement >= 0); - bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8(); int encoded_disp = displacement; - bool already_generated = false; - bool null_pointer_safepoint = false; switch (size) { - case kDouble: - // Intentional fall-though. + case kDouble: // Intentional fall-through. + case kWord: // Intentional fall-through. case k64: + DCHECK_EQ(encoded_disp & 0x3, 0); if (r_dest.IsFloat()) { - DCHECK(!r_dest.IsPair()); - opcode = kThumb2Vldrd; - if (displacement <= 1020) { + // Currently double values may be misaligned. + if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) { + // Can use scaled load. + opcode = FWIDE(kA64Ldr3fXD); + encoded_disp >>= 3; + short_form = true; + } else if (IS_SIGNED_IMM9(displacement)) { + // Can use unscaled load. + opcode = FWIDE(kA64Ldur3fXd); short_form = true; - encoded_disp >>= 2; - } - } else { - if (displacement <= 1020) { - load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(), - displacement >> 2); } else { - load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32); - null_pointer_safepoint = true; - LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32); + short_form = false; } - already_generated = true; + } else { + // Currently long values may be misaligned. + if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) { + // Can use scaled store. + opcode = FWIDE(kA64Ldr3rXD); + encoded_disp >>= 3; + short_form = true; + } else if (IS_SIGNED_IMM9(displacement)) { + // Can use unscaled store. + opcode = FWIDE(kA64Ldur3rXd); + short_form = true; + } // else: use long sequence (short_form = false). } break; - case kSingle: - // Intentional fall-though. - case k32: - // Intentional fall-though. + case kSingle: // Intentional fall-through. + case k32: // Intentional fall-trough. case kReference: if (r_dest.IsFloat()) { - opcode = kThumb2Vldrs; + opcode = kA64Ldr3fXD; if (displacement <= 1020) { short_form = true; encoded_disp >>= 2; } break; } - if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && - (displacement >= 0)) { - short_form = true; - encoded_disp >>= 2; - opcode = kThumbLdrPcRel; - } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) && - (displacement >= 0)) { - short_form = true; - encoded_disp >>= 2; - opcode = kThumbLdrSpRel; - } else if (all_low && displacement < 128 && displacement >= 0) { + if (displacement <= 16380 && displacement >= 0) { DCHECK_EQ((displacement & 0x3), 0); short_form = true; encoded_disp >>= 2; - opcode = kThumbLdrRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrRRI12; + opcode = kA64Ldr3rXD; } break; case kUnsignedHalf: - if (all_low && displacement < 64 && displacement >= 0) { + if (displacement < 64 && displacement >= 0) { DCHECK_EQ((displacement & 0x1), 0); short_form = true; encoded_disp >>= 1; - opcode = kThumbLdrhRRI5; + opcode = kA64Ldrh3wXF; } else if (displacement < 4092 && displacement >= 0) { short_form = true; - opcode = kThumb2LdrhRRI12; + opcode = kA64Ldrh3wXF; } break; case kSignedHalf: - if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrshRRI12; - } + short_form = true; + opcode = kA64Ldrsh3rXF; break; case kUnsignedByte: - if (all_low && displacement < 32 && displacement >= 0) { - short_form = true; - opcode = kThumbLdrbRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrbRRI12; - } + short_form = true; + opcode = kA64Ldrb3wXd; break; case kSignedByte: - if (thumb2Form) { - short_form = true; - opcode = kThumb2LdrsbRRI12; - } + short_form = true; + opcode = kA64Ldrsb3rXd; break; default: LOG(FATAL) << "Bad size: " << size; } - if (!already_generated) { - if (short_form) { - load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp); + if (short_form) { + load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp); + } else { + RegStorage reg_offset = AllocTemp(); + LoadConstant(reg_offset, encoded_disp); + if (r_dest.IsFloat()) { + // No index ops - must use a long sequence. Turn the offset into a direct pointer. + OpRegReg(kOpAdd, reg_offset, r_base); + load = LoadBaseDispBody(reg_offset, 0, r_dest, size); } else { - RegStorage reg_offset = AllocTemp(); - LoadConstant(reg_offset, encoded_disp); - if (r_dest.IsFloat()) { - // No index ops - must use a long sequence. Turn the offset into a direct pointer. - OpRegReg(kOpAdd, reg_offset, r_base); - load = LoadBaseDispBody(reg_offset, 0, r_dest, size); - } else { - load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); - } - FreeTemp(reg_offset); + load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); } + FreeTemp(reg_offset); } // TODO: in future may need to differentiate Dalvik accesses w/ spills - if (r_base == rs_rARM_SP) { + if (r_base == rs_rA64_SP) { AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); - } else { - // We might need to generate a safepoint if we have two store instructions (wide or double). - if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { - MarkSafepointPC(load); - } } return load; } LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size) { - // TODO: base this on target. - if (size == kWord) { - size = k32; - } return LoadBaseDispBody(r_base, displacement, r_dest, size); } LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, - OpSize size) { + OpSize size) { LIR* store = NULL; - ArmOpcode opcode = kThumbBkpt; + ArmOpcode opcode = kA64Brk1d; bool short_form = false; - bool thumb2Form = (displacement < 4092 && displacement >= 0); - bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8(); int encoded_disp = displacement; - bool already_generated = false; - bool null_pointer_safepoint = false; switch (size) { + case kDouble: // Intentional fall-through. + case kWord: // Intentional fall-through. case k64: - case kDouble: - if (!r_src.IsFloat()) { - if (displacement <= 1020) { - store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(), - displacement >> 2); - } else { - store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32); - null_pointer_safepoint = true; - StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32); - } - already_generated = true; + DCHECK_EQ(encoded_disp & 0x3, 0); + if (r_src.IsFloat()) { + // Currently double values may be misaligned. + if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) { + // Can use scaled store. + opcode = FWIDE(kA64Str3fXD); + encoded_disp >>= 3; + short_form = true; + } else if (IS_SIGNED_IMM9(displacement)) { + // Can use unscaled store. + opcode = FWIDE(kA64Stur3fXd); + short_form = true; + } // else: use long sequence (short_form = false). } else { - DCHECK(!r_src.IsPair()); - opcode = kThumb2Vstrd; - if (displacement <= 1020) { + // Currently long values may be misaligned. + if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) { + // Can use scaled store. + opcode = FWIDE(kA64Str3rXD); + encoded_disp >>= 3; short_form = true; - encoded_disp >>= 2; - } + } else if (IS_SIGNED_IMM9(displacement)) { + // Can use unscaled store. + opcode = FWIDE(kA64Stur3rXd); + short_form = true; + } // else: use long sequence (short_form = false). } break; - case kSingle: - // Intentional fall-through. - case k32: - // Intentional fall-through. + case kSingle: // Intentional fall-through. + case k32: // Intentional fall-trough. case kReference: if (r_src.IsFloat()) { DCHECK(r_src.IsSingle()); - opcode = kThumb2Vstrs; + DCHECK_EQ(encoded_disp & 0x3, 0); + opcode = kA64Str3fXD; if (displacement <= 1020) { short_form = true; encoded_disp >>= 2; } break; } - if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { - short_form = true; - encoded_disp >>= 2; - opcode = kThumbStrSpRel; - } else if (all_low && displacement < 128 && displacement >= 0) { + + if (displacement <= 16380 && displacement >= 0) { DCHECK_EQ((displacement & 0x3), 0); short_form = true; encoded_disp >>= 2; - opcode = kThumbStrRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2StrRRI12; + opcode = kA64Str3rXD; } break; case kUnsignedHalf: case kSignedHalf: - if (all_low && displacement < 64 && displacement >= 0) { - DCHECK_EQ((displacement & 0x1), 0); - short_form = true; - encoded_disp >>= 1; - opcode = kThumbStrhRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2StrhRRI12; - } + DCHECK_EQ((displacement & 0x1), 0); + short_form = true; + encoded_disp >>= 1; + opcode = kA64Strh3wXF; break; case kUnsignedByte: case kSignedByte: - if (all_low && displacement < 32 && displacement >= 0) { - short_form = true; - opcode = kThumbStrbRRI5; - } else if (thumb2Form) { - short_form = true; - opcode = kThumb2StrbRRI12; - } + short_form = true; + opcode = kA64Strb3wXd; break; default: LOG(FATAL) << "Bad size: " << size; } - if (!already_generated) { - if (short_form) { - store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp); + + if (short_form) { + store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp); + } else { + RegStorage r_scratch = AllocTemp(); + LoadConstant(r_scratch, encoded_disp); + if (r_src.IsFloat()) { + // No index ops - must use a long sequence. Turn the offset into a direct pointer. + OpRegReg(kOpAdd, r_scratch, r_base); + store = StoreBaseDispBody(r_scratch, 0, r_src, size); } else { - RegStorage r_scratch = AllocTemp(); - LoadConstant(r_scratch, encoded_disp); - if (r_src.IsFloat()) { - // No index ops - must use a long sequence. Turn the offset into a direct pointer. - OpRegReg(kOpAdd, r_scratch, r_base); - store = StoreBaseDispBody(r_scratch, 0, r_src, size); - } else { - store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); - } - FreeTemp(r_scratch); + store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); } + FreeTemp(r_scratch); } // TODO: In future, may need to differentiate Dalvik & spill accesses - if (r_base == rs_rARM_SP) { + if (r_base == rs_rA64_SP) { AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); - } else { - // We might need to generate a safepoint if we have two store instructions (wide or double). - if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { - MarkSafepointPC(store); - } } return store; } LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size) { - // TODO: base this on target. - if (size == kWord) { - size = k32; - } return StoreBaseDispBody(r_base, displacement, r_src, size); } LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { - int opcode; - DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble()); - if (r_dest.IsDouble()) { - opcode = kThumb2Vmovd; - } else { - if (r_dest.IsSingle()) { - opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr; - } else { - DCHECK(r_src.IsSingle()); - opcode = kThumb2Fmrs; - } - } - LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { - res->flags.is_nop = true; - } - return res; + LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64"; + return NULL; } -LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) { - LOG(FATAL) << "Unexpected use of OpThreadMem for Arm"; +LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, A64ThreadOffset thread_offset) { + LOG(FATAL) << "Unexpected use of OpThreadMem for Arm64"; return NULL; } LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { - LOG(FATAL) << "Unexpected use of OpMem for Arm"; + LOG(FATAL) << "Unexpected use of OpMem for Arm64"; return NULL; } LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, - int displacement, RegStorage r_src, OpSize size) { - LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm"; + int displacement, RegStorage r_src, OpSize size) { + LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm64"; return NULL; } LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) { - LOG(FATAL) << "Unexpected use of OpRegMem for Arm"; + LOG(FATAL) << "Unexpected use of OpRegMem for Arm64"; return NULL; } LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) { - LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm"; + LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm64"; return NULL; } diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index fbf8a0cc5b..784dfaf96a 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -497,6 +497,7 @@ void Mir2Lir::InstallSwitchTables() { case kX86_64: bx_offset = 0; break; + case kArm64: case kMips: bx_offset = tab_rec->anchor->offset; break; diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index d51f2e0d32..d321b0063d 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -346,7 +346,7 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<4> /* * If there are any ins passed in registers that have not been promoted - * to a callee-save register, flush them to the frame. Perform intial + * to a callee-save register, flush them to the frame. Perform initial * assignment of promoted arguments. * * ArgLocs is an array of location records describing the incoming arguments diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 4891d8c830..4b1de4b21e 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -91,6 +91,7 @@ typedef uint32_t CodeOffset; // Native code offset in bytes. // Common combo register usage patterns. #define REG_DEF01 (REG_DEF0 | REG_DEF1) +#define REG_DEF012 (REG_DEF0 | REG_DEF1 | REG_DEF2) #define REG_DEF01_USE2 (REG_DEF0 | REG_DEF1 | REG_USE2) #define REG_DEF0_USE01 (REG_DEF0 | REG_USE01) #define REG_DEF0_USE0 (REG_DEF0 | REG_USE0) @@ -167,6 +168,8 @@ struct LIR { // Target-specific initialization. Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, ArenaAllocator* const arena); +Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena); Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, ArenaAllocator* const arena); Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, @@ -783,7 +786,7 @@ class Mir2Lir : public Backend { bool safepoint_pc); void GenInvoke(CallInfo* info); void GenInvokeNoInline(CallInfo* info); - void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); + virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, const MethodReference& target_method, @@ -830,7 +833,7 @@ class Mir2Lir : public Backend { bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile); bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object, bool is_volatile, bool is_ordered); - int LoadArgRegs(CallInfo* info, int call_state, + virtual int LoadArgRegs(CallInfo* info, int call_state, NextCallInsn next_call_insn, const MethodReference& target_method, uint32_t vtable_idx, |