diff options
author | Bill Buzbee <buzbee@android.com> | 2014-05-06 23:46:11 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2014-05-06 23:46:11 +0000 |
commit | 7189fee4268c70d7ed0151e988ff7c7cd85f2a30 (patch) | |
tree | 88c353fd9752d7deb4afb914e5444197fa85e442 | |
parent | 68f8e40c90b323346c15d3f54f5884ef7e7af8c5 (diff) | |
parent | 43ec8737d8356dbff0a90bee521fb0e73438da47 (diff) | |
download | art-7189fee4268c70d7ed0151e988ff7c7cd85f2a30.tar.gz art-7189fee4268c70d7ed0151e988ff7c7cd85f2a30.tar.bz2 art-7189fee4268c70d7ed0151e988ff7c7cd85f2a30.zip |
Merge "AArch64: Added arm64 quick backend as an arm clone."
-rw-r--r-- | compiler/dex/quick/arm64/arm64_lir.h | 590 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/assemble_arm64.cc | 1682 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/call_arm64.cc | 477 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/codegen_arm64.h | 212 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/create.sh | 19 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/fp_arm64.cc | 358 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 1460 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 803 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/utility_arm64.cc | 1149 |
9 files changed, 6750 insertions, 0 deletions
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h new file mode 100644 index 000000000..c6d629581 --- /dev/null +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -0,0 +1,590 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_ +#define ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_ + +#include "dex/compiler_internals.h" + +namespace art { + +/* + * Runtime register usage conventions. + * + * r0-r3: Argument registers in both Dalvik and C/C++ conventions. + * However, for Dalvik->Dalvik calls we'll pass the target's Method* + * pointer in r0 as a hidden arg0. Otherwise used as codegen scratch + * registers. + * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit + * r4 : (rARM_SUSPEND) is reserved (suspend check/debugger assist) + * r5 : Callee save (promotion target) + * r6 : Callee save (promotion target) + * r7 : Callee save (promotion target) + * r8 : Callee save (promotion target) + * r9 : (rARM_SELF) is reserved (pointer to thread-local storage) + * r10 : Callee save (promotion target) + * r11 : Callee save (promotion target) + * r12 : Scratch, may be trashed by linkage stubs + * r13 : (sp) is reserved + * r14 : (lr) is reserved + * r15 : (pc) is reserved + * + * 5 core temps that codegen can use (r0, r1, r2, r3, r12) + * 7 core registers that can be used for promotion + * + * Floating pointer registers + * s0-s31 + * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31} + * + * s16-s31 (d8-d15) preserved across C calls + * s0-s15 (d0-d7) trashed across C calls + * + * s0-s15/d0-d7 used as codegen temp/scratch + * s16-s31/d8-d31 can be used for promotion. + * + * Calling convention + * o On a call to a Dalvik method, pass target's Method* in r0 + * o r1-r3 will be used for up to the first 3 words of arguments + * o Arguments past the first 3 words will be placed in appropriate + * out slots by the caller. + * o If a 64-bit argument would span the register/memory argument + * boundary, it will instead be fully passed in the frame. + * o Maintain a 16-byte stack alignment + * + * Stack frame diagram (stack grows down, higher addresses at top): + * + * +------------------------+ + * | IN[ins-1] | {Note: resides in caller's frame} + * | . | + * | IN[0] | + * | caller's Method* | + * +========================+ {Note: start of callee's frame} + * | spill region | {variable sized - will include lr if non-leaf.} + * +------------------------+ + * | ...filler word... | {Note: used as 2nd word of V[locals-1] if long] + * +------------------------+ + * | V[locals-1] | + * | V[locals-2] | + * | . | + * | . | + * | V[1] | + * | V[0] | + * +------------------------+ + * | 0 to 3 words padding | + * +------------------------+ + * | OUT[outs-1] | + * | OUT[outs-2] | + * | . | + * | OUT[0] | + * | cur_method* | <<== sp w/ 16-byte alignment + * +========================+ + */ + +// First FP callee save. +#define ARM_FP_CALLEE_SAVE_BASE 16 + +enum ArmResourceEncodingPos { + kArmGPReg0 = 0, + kArmRegSP = 13, + kArmRegLR = 14, + kArmRegPC = 15, + kArmFPReg0 = 16, + kArmFPReg16 = 32, + kArmRegEnd = 48, +}; + +#define ENCODE_ARM_REG_LIST(N) (static_cast<uint64_t>(N)) +#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) +#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) +#define ENCODE_ARM_REG_PC (1ULL << kArmRegPC) +#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16) + +enum ArmNativeRegisterPool { + r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, + r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1, + r2 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2, + r3 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3, + rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4, + r5 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5, + r6 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6, + r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7, + r8 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8, + rARM_SELF = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9, + r10 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10, + r11 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11, + r12 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12, + r13sp = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13, + rARM_SP = r13sp, + r14lr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14, + rARM_LR = r14lr, + r15pc = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15, + rARM_PC = r15pc, + + fr0 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0, + fr1 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1, + fr2 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2, + fr3 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3, + fr4 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4, + fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5, + fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, + fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, + fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8, + fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9, + fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10, + fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11, + fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12, + fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13, + fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14, + fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15, + fr16 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16, + fr17 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17, + fr18 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18, + fr19 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19, + fr20 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20, + fr21 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21, + fr22 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22, + fr23 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23, + fr24 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24, + fr25 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25, + fr26 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26, + fr27 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27, + fr28 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28, + fr29 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29, + fr30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30, + fr31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31, + + dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, + dr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1, + dr2 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2, + dr3 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3, + dr4 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4, + dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, + dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, + dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, + dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, + dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9, + dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, + dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11, + dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12, + dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13, + dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, + dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15, +#if 0 + // Enable when def/use and runtime able to handle these. + dr16 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16, + dr17 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17, + dr18 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18, + dr19 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19, + dr20 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20, + dr21 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21, + dr22 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22, + dr23 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23, + dr24 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24, + dr25 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25, + dr26 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26, + dr27 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27, + dr28 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28, + dr29 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29, + dr30 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30, + dr31 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31, +#endif +}; + +constexpr RegStorage rs_r0(RegStorage::kValid | r0); +constexpr RegStorage rs_r1(RegStorage::kValid | r1); +constexpr RegStorage rs_r2(RegStorage::kValid | r2); +constexpr RegStorage rs_r3(RegStorage::kValid | r3); +constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND); +constexpr RegStorage rs_r5(RegStorage::kValid | r5); +constexpr RegStorage rs_r6(RegStorage::kValid | r6); +constexpr RegStorage rs_r7(RegStorage::kValid | r7); +constexpr RegStorage rs_r8(RegStorage::kValid | r8); +constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF); +constexpr RegStorage rs_r10(RegStorage::kValid | r10); +constexpr RegStorage rs_r11(RegStorage::kValid | r11); +constexpr RegStorage rs_r12(RegStorage::kValid | r12); +constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp); +constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP); +constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr); +constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR); +constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc); +constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC); +constexpr RegStorage rs_invalid(RegStorage::kInvalid); + +constexpr RegStorage rs_fr0(RegStorage::kValid | fr0); +constexpr RegStorage rs_fr1(RegStorage::kValid | fr1); +constexpr RegStorage rs_fr2(RegStorage::kValid | fr2); +constexpr RegStorage rs_fr3(RegStorage::kValid | fr3); +constexpr RegStorage rs_fr4(RegStorage::kValid | fr4); +constexpr RegStorage rs_fr5(RegStorage::kValid | fr5); +constexpr RegStorage rs_fr6(RegStorage::kValid | fr6); +constexpr RegStorage rs_fr7(RegStorage::kValid | fr7); +constexpr RegStorage rs_fr8(RegStorage::kValid | fr8); +constexpr RegStorage rs_fr9(RegStorage::kValid | fr9); +constexpr RegStorage rs_fr10(RegStorage::kValid | fr10); +constexpr RegStorage rs_fr11(RegStorage::kValid | fr11); +constexpr RegStorage rs_fr12(RegStorage::kValid | fr12); +constexpr RegStorage rs_fr13(RegStorage::kValid | fr13); +constexpr RegStorage rs_fr14(RegStorage::kValid | fr14); +constexpr RegStorage rs_fr15(RegStorage::kValid | fr15); +constexpr RegStorage rs_fr16(RegStorage::kValid | fr16); +constexpr RegStorage rs_fr17(RegStorage::kValid | fr17); +constexpr RegStorage rs_fr18(RegStorage::kValid | fr18); +constexpr RegStorage rs_fr19(RegStorage::kValid | fr19); +constexpr RegStorage rs_fr20(RegStorage::kValid | fr20); +constexpr RegStorage rs_fr21(RegStorage::kValid | fr21); +constexpr RegStorage rs_fr22(RegStorage::kValid | fr22); +constexpr RegStorage rs_fr23(RegStorage::kValid | fr23); +constexpr RegStorage rs_fr24(RegStorage::kValid | fr24); +constexpr RegStorage rs_fr25(RegStorage::kValid | fr25); +constexpr RegStorage rs_fr26(RegStorage::kValid | fr26); +constexpr RegStorage rs_fr27(RegStorage::kValid | fr27); +constexpr RegStorage rs_fr28(RegStorage::kValid | fr28); +constexpr RegStorage rs_fr29(RegStorage::kValid | fr29); +constexpr RegStorage rs_fr30(RegStorage::kValid | fr30); +constexpr RegStorage rs_fr31(RegStorage::kValid | fr31); + +constexpr RegStorage rs_dr0(RegStorage::kValid | dr0); +constexpr RegStorage rs_dr1(RegStorage::kValid | dr1); +constexpr RegStorage rs_dr2(RegStorage::kValid | dr2); +constexpr RegStorage rs_dr3(RegStorage::kValid | dr3); +constexpr RegStorage rs_dr4(RegStorage::kValid | dr4); +constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); +constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); +constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); +constexpr RegStorage rs_dr8(RegStorage::kValid | dr8); +constexpr RegStorage rs_dr9(RegStorage::kValid | dr9); +constexpr RegStorage rs_dr10(RegStorage::kValid | dr10); +constexpr RegStorage rs_dr11(RegStorage::kValid | dr11); +constexpr RegStorage rs_dr12(RegStorage::kValid | dr12); +constexpr RegStorage rs_dr13(RegStorage::kValid | dr13); +constexpr RegStorage rs_dr14(RegStorage::kValid | dr14); +constexpr RegStorage rs_dr15(RegStorage::kValid | dr15); +#if 0 +constexpr RegStorage rs_dr16(RegStorage::kValid | dr16); +constexpr RegStorage rs_dr17(RegStorage::kValid | dr17); +constexpr RegStorage rs_dr18(RegStorage::kValid | dr18); +constexpr RegStorage rs_dr19(RegStorage::kValid | dr19); +constexpr RegStorage rs_dr20(RegStorage::kValid | dr20); +constexpr RegStorage rs_dr21(RegStorage::kValid | dr21); +constexpr RegStorage rs_dr22(RegStorage::kValid | dr22); +constexpr RegStorage rs_dr23(RegStorage::kValid | dr23); +constexpr RegStorage rs_dr24(RegStorage::kValid | dr24); +constexpr RegStorage rs_dr25(RegStorage::kValid | dr25); +constexpr RegStorage rs_dr26(RegStorage::kValid | dr26); +constexpr RegStorage rs_dr27(RegStorage::kValid | dr27); +constexpr RegStorage rs_dr28(RegStorage::kValid | dr28); +constexpr RegStorage rs_dr29(RegStorage::kValid | dr29); +constexpr RegStorage rs_dr30(RegStorage::kValid | dr30); +constexpr RegStorage rs_dr31(RegStorage::kValid | dr31); +#endif + +// RegisterLocation templates return values (r0, or r0/r1). +const RegLocation arm_loc_c_return + {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, + RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_wide + {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, + RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_float + {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, + RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG}; +const RegLocation arm_loc_c_return_double + {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, + RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG}; + +enum ArmShiftEncodings { + kArmLsl = 0x0, + kArmLsr = 0x1, + kArmAsr = 0x2, + kArmRor = 0x3 +}; + +/* + * The following enum defines the list of supported Thumb instructions by the + * assembler. Their corresponding EncodingMap positions will be defined in + * Assemble.cc. + */ +enum ArmOpcode { + kArmFirst = 0, + kArm16BitData = kArmFirst, // DATA [0] rd[15..0]. + kThumbAdcRR, // adc [0100000101] rm[5..3] rd[2..0]. + kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0]. + kThumbAddRI8, // add(2) [00110] rd[10..8] imm_8[7..0]. + kThumbAddRRR, // add(3) [0001100] rm[8..6] rn[5..3] rd[2..0]. + kThumbAddRRLH, // add(4) [01000100] H12[01] rm[5..3] rd[2..0]. + kThumbAddRRHL, // add(4) [01001000] H12[10] rm[5..3] rd[2..0]. + kThumbAddRRHH, // add(4) [01001100] H12[11] rm[5..3] rd[2..0]. + kThumbAddPcRel, // add(5) [10100] rd[10..8] imm_8[7..0]. + kThumbAddSpRel, // add(6) [10101] rd[10..8] imm_8[7..0]. + kThumbAddSpI7, // add(7) [101100000] imm_7[6..0]. + kThumbAndRR, // and [0100000000] rm[5..3] rd[2..0]. + kThumbAsrRRI5, // asr(1) [00010] imm_5[10..6] rm[5..3] rd[2..0]. + kThumbAsrRR, // asr(2) [0100000100] rs[5..3] rd[2..0]. + kThumbBCond, // b(1) [1101] cond[11..8] offset_8[7..0]. + kThumbBUncond, // b(2) [11100] offset_11[10..0]. + kThumbBicRR, // bic [0100001110] rm[5..3] rd[2..0]. + kThumbBkpt, // bkpt [10111110] imm_8[7..0]. + kThumbBlx1, // blx(1) [111] H[10] offset_11[10..0]. + kThumbBlx2, // blx(1) [111] H[01] offset_11[10..0]. + kThumbBl1, // blx(1) [111] H[10] offset_11[10..0]. + kThumbBl2, // blx(1) [111] H[11] offset_11[10..0]. + kThumbBlxR, // blx(2) [010001111] rm[6..3] [000]. + kThumbBx, // bx [010001110] H2[6..6] rm[5..3] SBZ[000]. + kThumbCmnRR, // cmn [0100001011] rm[5..3] rd[2..0]. + kThumbCmpRI8, // cmp(1) [00101] rn[10..8] imm_8[7..0]. + kThumbCmpRR, // cmp(2) [0100001010] rm[5..3] rd[2..0]. + kThumbCmpLH, // cmp(3) [01000101] H12[01] rm[5..3] rd[2..0]. + kThumbCmpHL, // cmp(3) [01000110] H12[10] rm[5..3] rd[2..0]. + kThumbCmpHH, // cmp(3) [01000111] H12[11] rm[5..3] rd[2..0]. + kThumbEorRR, // eor [0100000001] rm[5..3] rd[2..0]. + kThumbLdmia, // ldmia [11001] rn[10..8] reglist [7..0]. + kThumbLdrRRI5, // ldr(1) [01101] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbLdrRRR, // ldr(2) [0101100] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrPcRel, // ldr(3) [01001] rd[10..8] imm_8[7..0]. + kThumbLdrSpRel, // ldr(4) [10011] rd[10..8] imm_8[7..0]. + kThumbLdrbRRI5, // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbLdrbRRR, // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrhRRI5, // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbLdrhRRR, // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrsbRRR, // ldrsb [0101011] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrshRRR, // ldrsh [0101111] rm[8..6] rn[5..3] rd[2..0]. + kThumbLslRRI5, // lsl(1) [00000] imm_5[10..6] rm[5..3] rd[2..0]. + kThumbLslRR, // lsl(2) [0100000010] rs[5..3] rd[2..0]. + kThumbLsrRRI5, // lsr(1) [00001] imm_5[10..6] rm[5..3] rd[2..0]. + kThumbLsrRR, // lsr(2) [0100000011] rs[5..3] rd[2..0]. + kThumbMovImm, // mov(1) [00100] rd[10..8] imm_8[7..0]. + kThumbMovRR, // mov(2) [0001110000] rn[5..3] rd[2..0]. + kThumbMovRR_H2H, // mov(3) [01000111] H12[11] rm[5..3] rd[2..0]. + kThumbMovRR_H2L, // mov(3) [01000110] H12[01] rm[5..3] rd[2..0]. + kThumbMovRR_L2H, // mov(3) [01000101] H12[10] rm[5..3] rd[2..0]. + kThumbMul, // mul [0100001101] rm[5..3] rd[2..0]. + kThumbMvn, // mvn [0100001111] rm[5..3] rd[2..0]. + kThumbNeg, // neg [0100001001] rm[5..3] rd[2..0]. + kThumbOrr, // orr [0100001100] rm[5..3] rd[2..0]. + kThumbPop, // pop [1011110] r[8..8] rl[7..0]. + kThumbPush, // push [1011010] r[8..8] rl[7..0]. + kThumbRev, // rev [1011101000] rm[5..3] rd[2..0] + kThumbRevsh, // revsh [1011101011] rm[5..3] rd[2..0] + kThumbRorRR, // ror [0100000111] rs[5..3] rd[2..0]. + kThumbSbc, // sbc [0100000110] rm[5..3] rd[2..0]. + kThumbStmia, // stmia [11000] rn[10..8] reglist [7.. 0]. + kThumbStrRRI5, // str(1) [01100] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbStrRRR, // str(2) [0101000] rm[8..6] rn[5..3] rd[2..0]. + kThumbStrSpRel, // str(3) [10010] rd[10..8] imm_8[7..0]. + kThumbStrbRRI5, // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbStrbRRR, // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0]. + kThumbStrhRRI5, // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbStrhRRR, // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0]. + kThumbSubRRI3, // sub(1) [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/ + kThumbSubRI8, // sub(2) [00111] rd[10..8] imm_8[7..0]. + kThumbSubRRR, // sub(3) [0001101] rm[8..6] rn[5..3] rd[2..0]. + kThumbSubSpI7, // sub(4) [101100001] imm_7[6..0]. + kThumbSwi, // swi [11011111] imm_8[7..0]. + kThumbTst, // tst [0100001000] rm[5..3] rn[2..0]. + kThumb2Vldrs, // vldr low sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0]. + kThumb2Vldrd, // vldr low dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0]. + kThumb2Vmuls, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0]. + kThumb2Vmuld, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0]. + kThumb2Vstrs, // vstr low sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0]. + kThumb2Vstrd, // vstr low dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0]. + kThumb2Vsubs, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0]. + kThumb2Vsubd, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0]. + kThumb2Vadds, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0]. + kThumb2Vaddd, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0]. + kThumb2Vdivs, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0]. + kThumb2Vdivd, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0]. + kThumb2VmlaF64, // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0]. + kThumb2VcvtIF, // vcvt.F32.S32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0]. + kThumb2VcvtFI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0]. + kThumb2VcvtDI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0]. + kThumb2VcvtFd, // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0]. + kThumb2VcvtDF, // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0]. + kThumb2VcvtF64S32, // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0]. + kThumb2VcvtF64U32, // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0]. + kThumb2Vsqrts, // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0]. + kThumb2Vsqrtd, // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0]. + kThumb2MovI8M, // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8. + kThumb2MovImm16, // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8. + kThumb2StrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0]. + kThumb2LdrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0]. + kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0]. + kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0]. + kThumb2Cbnz, // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0]. + kThumb2Cbz, // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0]. + kThumb2AddRRI12, // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2MovRR, // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0]. + kThumb2Vmovs, // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0]. + kThumb2Vmovd, // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0]. + kThumb2Ldmia, // ldmia [111010001001] rn[19..16] mask[15..0]. + kThumb2Stmia, // stmia [111010001000] rn[19..16] mask[15..0]. + kThumb2AddRRR, // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2SubRRR, // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2SbcRRR, // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2CmpRR, // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0]. + kThumb2SubRRI12, // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2MvnI8M, // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8. + kThumb2Sel, // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0]. + kThumb2Ubfx, // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0]. + kThumb2Sbfx, // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0]. + kThumb2LdrRRR, // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrhRRR, // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrshRRR, // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrbRRR, // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrsbRRR, // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2StrRRR, // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2StrhRRR, // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2StrbRRR, // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrhRRI12, // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2LdrshRRI12, // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2LdrbRRI12, // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2LdrsbRRI12, // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2StrhRRI12, // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2Pop, // pop [1110100010111101] list[15-0]*/ + kThumb2Push, // push [1110100100101101] list[15-0]*/ + kThumb2CmpRI8M, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. + kThumb2CmnRI8M, // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0]. + kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0]. + kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2SdivRRR, // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. + kThumb2UdivRRR, // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. + kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0]. + kThumb2RsubRRI8M, // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2NegRR, // actually rsub rd, rn, #0. + kThumb2OrrRRR, // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2TstRR, // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0]. + kThumb2LslRRR, // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0]. + kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0]. + kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0]. + kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0]. + kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2EorRRI8M, // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2AddRRI8M, // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2AdcRRI8M, // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2SubRRI8M, // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2SbcRRI8M, // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2RevRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0] + kThumb2RevshRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0] + kThumb2It, // it [10111111] firstcond[7-4] mask[3-0]. + kThumb2Fmstat, // fmstat [11101110111100011111101000010000]. + kThumb2Vcmpd, // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0]. + kThumb2Vcmps, // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0]. + kThumb2LdrPcRel12, // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0]. + kThumb2BCond, // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0]. + kThumb2Fmrs, // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000]. + kThumb2Fmsr, // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000]. + kThumb2Fmrrd, // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. + kThumb2Fmdrr, // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. + kThumb2Vabsd, // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0]. + kThumb2Vabss, // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. + kThumb2Vnegd, // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0]. + kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. + kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0]. + kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0]. + kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0]. + kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. + kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0]. + kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111]. + kThumb2Strex, // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0]. + kThumb2Strexd, // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0]. + kThumb2Clrex, // clrex [11110011101111111000111100101111]. + kThumb2Bfi, // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. + kThumb2Bfc, // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. + kThumb2Dmb, // dmb [1111001110111111100011110101] option[3-0]. + kThumb2LdrPcReln12, // ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0]. + kThumb2Stm, // stm <list> [111010010000] rn[19-16] 000 rl[12-0]. + kThumbUndefined, // undefined [11011110xxxxxxxx]. + kThumb2VPopCS, // vpop <list of callee save fp singles (s16+). + kThumb2VPushCS, // vpush <list callee save fp singles (s16+). + kThumb2Vldms, // vldms rd, <list>. + kThumb2Vstms, // vstms rd, <list>. + kThumb2BUncond, // b <label>. + kThumb2MovImm16H, // similar to kThumb2MovImm16, but target high hw. + kThumb2AddPCR, // Thumb2 2-operand add with hard-coded PC target. + kThumb2Adr, // Special purpose encoding of ADR for switch tables. + kThumb2MovImm16LST, // Special purpose version for switch table use. + kThumb2MovImm16HST, // Special purpose version for switch table use. + kThumb2LdmiaWB, // ldmia [111010011001[ rn[19..16] mask[15..0]. + kThumb2OrrRRRs, // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2Push1, // t3 encoding of push. + kThumb2Pop1, // t3 encoding of pop. + kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. + kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024. + kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024]. + kThumb2StrdI8, // strd rt, rt2, [rn +-/1024]. + kArmLast, +}; + +enum ArmOpDmbOptions { + kSY = 0xf, + kST = 0xe, + kISH = 0xb, + kISHST = 0xa, + kNSH = 0x7, + kNSHST = 0x6 +}; + +// Instruction assembly field_loc kind. +enum ArmEncodingKind { + kFmtUnused, // Unused field and marks end of formats. + kFmtBitBlt, // Bit string using end/start. + kFmtDfp, // Double FP reg. + kFmtSfp, // Single FP reg. + kFmtModImm, // Shifted 8-bit immed using [26,14..12,7..0]. + kFmtImm16, // Zero-extended immed using [26,19..16,14..12,7..0]. + kFmtImm6, // Encoded branch target using [9,7..3]0. + kFmtImm12, // Zero-extended immediate using [26,14..12,7..0]. + kFmtShift, // Shift descriptor, [14..12,7..4]. + kFmtLsb, // least significant bit using [14..12][7..6]. + kFmtBWidth, // bit-field width, encoded as width-1. + kFmtShift5, // Shift count, [14..12,7..6]. + kFmtBrOffset, // Signed extended [26,11,13,21-16,10-0]:0. + kFmtFPImm, // Encoded floating point immediate. + kFmtOff24, // 24-bit Thumb2 unconditional branch encoding. + kFmtSkip, // Unused field, but continue to next. +}; + +// Struct used to define the snippet positions for each Thumb opcode. +struct ArmEncodingMap { + uint32_t skeleton; + struct { + ArmEncodingKind kind; + int end; // end for kFmtBitBlt, 1-bit slice end for FP regs. + int start; // start for kFmtBitBlt, 4-bit slice end for FP regs. + } field_loc[4]; + ArmOpcode opcode; + uint64_t flags; + const char* name; + const char* fmt; + int size; // Note: size is in bytes. + FixupKind fixup; +}; + +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_ diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc new file mode 100644 index 000000000..e79ebad00 --- /dev/null +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -0,0 +1,1682 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm64_lir.h" +#include "codegen_arm64.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +/* + * opcode: ArmOpcode enum + * skeleton: pre-designated bit-pattern for this opcode + * k0: key to applying ds/de + * ds: dest start bit position + * de: dest end bit position + * k1: key to applying s1s/s1e + * s1s: src1 start bit position + * s1e: src1 end bit position + * k2: key to applying s2s/s2e + * s2s: src2 start bit position + * s2e: src2 end bit position + * operands: number of operands (for sanity check purposes) + * name: mnemonic name + * fmt: for pretty-printing + */ +#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ + k3, k3s, k3e, flags, name, fmt, size, fixup) \ + {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ + {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup} + +/* Instruction dump string format keys: !pf, where "!" is the start + * of the key, "p" is which numeric operand to use and "f" is the + * print format. + * + * [p]ositions: + * 0 -> operands[0] (dest) + * 1 -> operands[1] (src1) + * 2 -> operands[2] (src2) + * 3 -> operands[3] (extra) + * + * [f]ormats: + * h -> 4-digit hex + * d -> decimal + * E -> decimal*4 + * F -> decimal*2 + * c -> branch condition (beq, bne, etc.) + * t -> pc-relative target + * u -> 1st half of bl[x] target + * v -> 2nd half ob bl[x] target + * R -> register list + * s -> single precision floating point register + * S -> double precision floating point register + * m -> Thumb2 modified immediate + * n -> complimented Thumb2 modified immediate + * M -> Thumb2 16-bit zero-extended immediate + * b -> 4-digit binary + * B -> dmb option string (sy, st, ish, ishst, nsh, hshst) + * H -> operand shift + * C -> core register name + * P -> fp cs register list (base of s16) + * Q -> fp cs register list (base of s0) + * + * [!] escape. To insert "!", use "!!" + */ +/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */ +const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kArmLast] = { + ENCODING_MAP(kArm16BitData, 0x0000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone), + ENCODING_MAP(kThumbAdcRR, 0x4140, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES, + "adcs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbAddRRI3, 0x1c00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "adds", "!0C, !1C, #!2d", 2, kFixupNone), + ENCODING_MAP(kThumbAddRI8, 0x3000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, + "adds", "!0C, !0C, #!1d", 2, kFixupNone), + ENCODING_MAP(kThumbAddRRR, 0x1800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, + "adds", "!0C, !1C, !2C", 2, kFixupNone), + ENCODING_MAP(kThumbAddRRLH, 0x4440, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, + "add", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbAddRRHL, 0x4480, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, + "add", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbAddRRHH, 0x44c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, + "add", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbAddPcRel, 0xa000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP, + "add", "!0C, pc, #!1E", 2, kFixupLoad), + ENCODING_MAP(kThumbAddSpRel, 0xa800, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP, + "add", "!0C, sp, #!2E", 2, kFixupNone), + ENCODING_MAP(kThumbAddSpI7, 0xb000, + kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, + "add", "sp, #!0d*4", 2, kFixupNone), + ENCODING_MAP(kThumbAndRR, 0x4000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "ands", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbAsrRRI5, 0x1000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "asrs", "!0C, !1C, #!2d", 2, kFixupNone), + ENCODING_MAP(kThumbAsrRR, 0x4100, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "asrs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbBCond, 0xd000, + kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | + NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch), + ENCODING_MAP(kThumbBUncond, 0xe000, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, + "b", "!0t", 2, kFixupT1Branch), + ENCODING_MAP(kThumbBicRR, 0x4380, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "bics", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbBkpt, 0xbe00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, + "bkpt", "!0d", 2, kFixupNone), + ENCODING_MAP(kThumbBlx1, 0xf000, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | + NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1), + ENCODING_MAP(kThumbBlx2, 0xe800, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | + NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel), + ENCODING_MAP(kThumbBl1, 0xf000, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl_1", "!0u", 2, kFixupBl1), + ENCODING_MAP(kThumbBl2, 0xf800, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl_2", "!0v", 2, kFixupLabel), + ENCODING_MAP(kThumbBlxR, 0x4780, + kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR, + "blx", "!0C", 2, kFixupNone), + ENCODING_MAP(kThumbBx, 0x4700, + kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, + "bx", "!0C", 2, kFixupNone), + ENCODING_MAP(kThumbCmnRR, 0x42c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmn", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbCmpRI8, 0x2800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, + "cmp", "!0C, #!1d", 2, kFixupNone), + ENCODING_MAP(kThumbCmpRR, 0x4280, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbCmpLH, 0x4540, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbCmpHL, 0x4580, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbCmpHH, 0x45c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbEorRR, 0x4040, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "eors", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbLdmia, 0xc800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, + "ldmia", "!0C!!, <!1R>", 2, kFixupNone), + ENCODING_MAP(kThumbLdrRRI5, 0x6800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrRRR, 0x5800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldr", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrPcRel, 0x4800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC + | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad), + ENCODING_MAP(kThumbLdrSpRel, 0x9800, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP + | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrbRRI5, 0x7800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrbRRR, 0x5c00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrhRRI5, 0x8800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrhRRR, 0x5a00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrsbRRR, 0x5600, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbLdrshRRR, 0x5e00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbLslRRI5, 0x0000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "lsls", "!0C, !1C, #!2d", 2, kFixupNone), + ENCODING_MAP(kThumbLslRR, 0x4080, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "lsls", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbLsrRRI5, 0x0800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "lsrs", "!0C, !1C, #!2d", 2, kFixupNone), + ENCODING_MAP(kThumbLsrRR, 0x40c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "lsrs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbMovImm, 0x2000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | SETS_CCODES, + "movs", "!0C, #!1d", 2, kFixupNone), + ENCODING_MAP(kThumbMovRR, 0x1c00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "movs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbMovRR_H2H, 0x46c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbMovRR_H2L, 0x4640, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbMovRR_L2H, 0x4680, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbMul, 0x4340, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "muls", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbMvn, 0x43c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "mvns", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbNeg, 0x4240, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "negs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbOrr, 0x4300, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "orrs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbPop, 0xbc00, + kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 + | IS_LOAD, "pop", "<!0R>", 2, kFixupNone), + ENCODING_MAP(kThumbPush, 0xb400, + kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 + | IS_STORE, "push", "<!0R>", 2, kFixupNone), + ENCODING_MAP(kThumbRev, 0xba00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1, + "rev", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbRevsh, 0xbac0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1, + "rev", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbRorRR, 0x41c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "rors", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbSbc, 0x4180, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES, + "sbcs", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbStmia, 0xc000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE, + "stmia", "!0C!!, <!1R>", 2, kFixupNone), + ENCODING_MAP(kThumbStrRRI5, 0x6000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0C, [!1C, #!2E]", 2, kFixupNone), + ENCODING_MAP(kThumbStrRRR, 0x5000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "str", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbStrSpRel, 0x9000, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP + | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone), + ENCODING_MAP(kThumbStrbRRI5, 0x7000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strb", "!0C, [!1C, #!2d]", 2, kFixupNone), + ENCODING_MAP(kThumbStrbRRR, 0x5400, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "strb", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbStrhRRI5, 0x8000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strh", "!0C, [!1C, #!2F]", 2, kFixupNone), + ENCODING_MAP(kThumbStrhRRR, 0x5200, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "strh", "!0C, [!1C, !2C]", 2, kFixupNone), + ENCODING_MAP(kThumbSubRRI3, 0x1e00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "subs", "!0C, !1C, #!2d", 2, kFixupNone), + ENCODING_MAP(kThumbSubRI8, 0x3800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, + "subs", "!0C, #!1d", 2, kFixupNone), + ENCODING_MAP(kThumbSubRRR, 0x1a00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, + "subs", "!0C, !1C, !2C", 2, kFixupNone), + ENCODING_MAP(kThumbSubSpI7, 0xb080, + kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, + "sub", "sp, #!0d*4", 2, kFixupNone), + ENCODING_MAP(kThumbSwi, 0xdf00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, + "swi", "!0d", 2, kFixupNone), + ENCODING_MAP(kThumbTst, 0x4200, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES, + "tst", "!0C, !1C", 2, kFixupNone), + /* + * Note: The encoding map entries for vldrd and vldrs include REG_DEF_LR, even though + * these instructions don't define lr. The reason is that these instructions + * are used for loading values from the literal pool, and the displacement may be found + * to be insuffient at assembly time. In that case, we need to materialize a new base + * register - and will use lr as the temp register. This works because lr is used as + * a temp register in very limited situations, and never in conjunction with a floating + * point constant load. However, it is possible that during instruction scheduling, + * another use of lr could be moved across a vldrd/vldrs. By setting REG_DEF_LR, we + * prevent that from happening. Note that we set REG_DEF_LR on all vldrd/vldrs - even those + * not used in a pc-relative case. It is really only needed on the pc-relative loads, but + * the case we're handling is rare enough that it seemed not worth the trouble to distinguish. + */ + ENCODING_MAP(kThumb2Vldrs, 0xed900a00, + kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad), + ENCODING_MAP(kThumb2Vldrd, 0xed900b00, + kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad), + ENCODING_MAP(kThumb2Vmuls, 0xee200a00, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, + "vmuls", "!0s, !1s, !2s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vmuld, 0xee200b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vmuld", "!0S, !1S, !2S", 4, kFixupNone), + ENCODING_MAP(kThumb2Vstrs, 0xed800a00, + kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone), + ENCODING_MAP(kThumb2Vstrd, 0xed800b00, + kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone), + ENCODING_MAP(kThumb2Vsubs, 0xee300a40, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vsub", "!0s, !1s, !2s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vsubd, 0xee300b40, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vsub", "!0S, !1S, !2S", 4, kFixupNone), + ENCODING_MAP(kThumb2Vadds, 0xee300a00, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vadd", "!0s, !1s, !2s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vaddd, 0xee300b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vadd", "!0S, !1S, !2S", 4, kFixupNone), + ENCODING_MAP(kThumb2Vdivs, 0xee800a00, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vdivs", "!0s, !1s, !2s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vdivd, 0xee800b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vdivd", "!0S, !1S, !2S", 4, kFixupNone), + ENCODING_MAP(kThumb2VmlaF64, 0xee000b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012, + "vmla", "!0S, !1S, !2S", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f32.s32", "!0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtFI, 0xeebd0ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtDI, 0xeebd0bc0, + kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtFd, 0xeeb70ac0, + kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtDF, 0xeeb70bc0, + kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtF64S32, 0xeeb80bc0, + kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2VcvtF64U32, 0xeeb80b40, + kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vsqrtd, 0xeeb10bc0, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */ + kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "mov", "!0C, #!1m", 4, kFixupNone), + ENCODING_MAP(kThumb2MovImm16, 0xf2400000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "mov", "!0C, #!1M", 4, kFixupNone), + ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0C, [!1C, #-!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */ + kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | + NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ), + ENCODING_MAP(kThumb2Cbz, 0xb100, /* Note: does not affect flags */ + kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | + NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ), + ENCODING_MAP(kThumb2AddRRI12, 0xf2000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ + "add", "!0C,!1C,#!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2MovRR, 0xea4f0000, /* no setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2Vmovs, 0xeeb00a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vmov.f32 ", " !0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vmovd, 0xeeb00b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vmov.f64 ", " !0S, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2Ldmia, 0xe8900000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, + "ldmia", "!0C!!, <!1R>", 4, kFixupNone), + ENCODING_MAP(kThumb2Stmia, 0xe8800000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE, + "stmia", "!0C!!, <!1R>", 4, kFixupNone), + ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "adds", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2SubRRR, 0xebb00000, /* setflags enconding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "subs", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2SbcRRR, 0xeb700000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES, + "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2CmpRR, 0xebb00f00, + kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2SubRRI12, 0xf2a00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ + "sub", "!0C,!1C,#!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2MvnI8M, 0xf06f0000, /* no setflags encoding */ + kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "mvn", "!0C, #!1n", 4, kFixupNone), + ENCODING_MAP(kThumb2Sel, 0xfaa0f080, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES, + "sel", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2Ubfx, 0xf3c00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, + kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, + "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), + ENCODING_MAP(kThumb2Sbfx, 0xf3400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, + kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, + "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrRRR, 0xf8500000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrRRR, 0xf8400000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, + "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrhRRR, 0xf8200000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, + "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrbRRR, 0xf8000000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, + "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strh", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strb", "!0C, [!1C, #!2d]", 4, kFixupNone), + ENCODING_MAP(kThumb2Pop, 0xe8bd0000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 + | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop), + ENCODING_MAP(kThumb2Push, 0xe92d0000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 + | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop), + ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00, + kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | SETS_CCODES, + "cmp", "!0C, #!1m", 4, kFixupNone), + ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00, + kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | SETS_CCODES, + "cmn", "!0C, #!1m", 4, kFixupNone), + ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2AndRRR, 0xea000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "and", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2BicRRR, 0xea200000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "bic", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2CmnRR, 0xeb000000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "cmn", "!0C, !1C, shift !2d", 4, kFixupNone), + ENCODING_MAP(kThumb2EorRRR, 0xea800000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "eor", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2MulRRR, 0xfb00f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2SdivRRR, 0xfb90f0f0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sdiv", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2UdivRRR, 0xfbb0f0f0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "udiv", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2MnvRR, 0xea6f0000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "mvn", "!0C, !1C, shift !2d", 4, kFixupNone), + ENCODING_MAP(kThumb2RsubRRI8M, 0xf1d00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "rsbs", "!0C,!1C,#!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "neg", "!0C,!1C", 4, kFixupNone), + ENCODING_MAP(kThumb2OrrRRR, 0xea400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "orr", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2TstRR, 0xea100f00, + kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, + "tst", "!0C, !1C, shift !2d", 4, kFixupNone), + ENCODING_MAP(kThumb2LslRRR, 0xfa00f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "lsl", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2LsrRRR, 0xfa20f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "lsr", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2AsrRRR, 0xfa40f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "asr", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2RorRRR, 0xfa60f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "ror", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2LslRRI5, 0xea4f0000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "lsl", "!0C, !1C, #!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2LsrRRI5, 0xea4f0010, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "lsr", "!0C, !1C, #!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2AsrRRI5, 0xea4f0020, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "asr", "!0C, !1C, #!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2RorRRI5, 0xea4f0030, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "ror", "!0C, !1C, #!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2BicRRI8M, 0xf0200000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "bic", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2AndRRI8M, 0xf0000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "and", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2OrrRRI8M, 0xf0400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "orr", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2EorRRI8M, 0xf0800000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "eor", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2AddRRI8M, 0xf1100000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "adds", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2AdcRRI8M, 0xf1500000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, + "adcs", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2SubRRI8M, 0xf1b00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "subs", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2SbcRRI8M, 0xf1700000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, + "sbcs", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2RevRR, 0xfa90f080, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. + "rev", "!0C, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. + "revsh", "!0C, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2It, 0xbf00, + kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES, + "it:!1b", "!0c", 2, kFixupNone), + ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES, + "fmstat", "", 4, kFixupNone), + ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, + "vcmp.f64", "!0S, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, + "vcmp.f32", "!0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad), + ENCODING_MAP(kThumb2BCond, 0xf0008000, + kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP, + "b!1c", "!0t", 4, kFixupCondBranch), + ENCODING_MAP(kThumb2Fmrs, 0xee100a10, + kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "fmrs", "!0C, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2Fmsr, 0xee000a10, + kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "fmsr", "!0s, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2Fmrrd, 0xec500b10, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2, + "fmrrd", "!0C, !1C, !2S", 4, kFixupNone), + ENCODING_MAP(kThumb2Fmdrr, 0xec400b10, + kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "fmdrr", "!0S, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2Vabsd, 0xeeb00bc0, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vabs.f64", "!0S, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2Vabss, 0xeeb00ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vabs.f32", "!0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vnegd, 0xeeb10b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vneg.f64", "!0S, !1S", 4, kFixupNone), + ENCODING_MAP(kThumb2Vnegs, 0xeeb10a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vneg.f32", "!0s, !1s", 4, kFixupNone), + ENCODING_MAP(kThumb2Vmovs_IMM8, 0xeeb00a00, + kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "vmov.f32", "!0s, #0x!1h", 4, kFixupNone), + ENCODING_MAP(kThumb2Vmovd_IMM8, 0xeeb00b00, + kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "vmov.f64", "!0S, #0x!1h", 4, kFixupNone), + ENCODING_MAP(kThumb2Mla, 0xfb000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123, + "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone), + ENCODING_MAP(kThumb2Umull, 0xfba00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 3, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, + "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone), + ENCODING_MAP(kThumb2Ldrex, 0xe8500f00, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone), + ENCODING_MAP(kThumb2Ldrexd, 0xe8d0007f, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD, + "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone), + ENCODING_MAP(kThumb2Strex, 0xe8400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, + "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone), + ENCODING_MAP(kThumb2Strexd, 0xe8c00070, + kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, + kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE, + "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone), + ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "clrex", "", 4, kFixupNone), + ENCODING_MAP(kThumb2Bfi, 0xf3600000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1, + kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, + "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone), + ENCODING_MAP(kThumb2Bfc, 0xf36f0000, + kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, + "bfc", "!0C,#!1d,#!2d", 4, kFixupNone), + ENCODING_MAP(kThumb2Dmb, 0xf3bf8f50, + kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, + "dmb", "#!0B", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, + "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone), + ENCODING_MAP(kThumb2Stm, 0xe9000000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE, + "stm", "!0C, <!1R>", 4, kFixupNone), + ENCODING_MAP(kThumbUndefined, 0xde00, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "undefined", "", 2, kFixupNone), + // NOTE: vpop, vpush hard-encoded for s16+ reg list + ENCODING_MAP(kThumb2VPopCS, 0xecbd8a00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0 + | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone), + ENCODING_MAP(kThumb2VPushCS, 0xed2d8a00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0 + | IS_STORE, "vpush", "<!0P>", 4, kFixupNone), + ENCODING_MAP(kThumb2Vldms, 0xec900a00, + kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2 + | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone), + ENCODING_MAP(kThumb2Vstms, 0xec800a00, + kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2 + | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone), + ENCODING_MAP(kThumb2BUncond, 0xf0009000, + kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, + "b", "!0t", 4, kFixupT2Branch), + ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, + "movt", "!0C, #!1M", 4, kFixupNone), + ENCODING_MAP(kThumb2AddPCR, 0x4487, + kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, + "add", "rPC, !0C", 2, kFixupLabel), + ENCODING_MAP(kThumb2Adr, 0xf20f0000, + kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + /* Note: doesn't affect flags */ + IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, + "adr", "!0C,#!1d", 4, kFixupAdr), + ENCODING_MAP(kThumb2MovImm16LST, 0xf2400000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, + "mov", "!0C, #!1M", 4, kFixupMovImmLST), + ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, + "movt", "!0C, #!1M", 4, kFixupMovImmHST), + ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, + "ldmia", "!0C!!, <!1R>", 4, kFixupNone), + ENCODING_MAP(kThumb2OrrRRRs, 0xea500000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2Push1, 0xf84d0d04, + kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0 + | IS_STORE, "push1", "!0C", 4, kFixupNone), + ENCODING_MAP(kThumb2Pop1, 0xf85d0b04, + kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0 + | IS_LOAD, "pop1", "!0C", 4, kFixupNone), + ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone), + ENCODING_MAP(kThumb2Smull, 0xfb800000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 3, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, + "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone), + ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad), + ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, + "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), + ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, + "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), +}; + +// new_lir replaces orig_lir in the pcrel_fixup list. +void Arm64Mir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { + new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next; + if (UNLIKELY(prev_lir == NULL)) { + first_fixup_ = new_lir; + } else { + prev_lir->u.a.pcrel_next = new_lir; + } + orig_lir->flags.fixup = kFixupNone; +} + +// new_lir is inserted before orig_lir in the pcrel_fixup list. +void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { + new_lir->u.a.pcrel_next = orig_lir; + if (UNLIKELY(prev_lir == NULL)) { + first_fixup_ = new_lir; + } else { + DCHECK(prev_lir->u.a.pcrel_next == orig_lir); + prev_lir->u.a.pcrel_next = new_lir; + } +} + +/* + * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is + * not ready. Since r5FP is not updated often, it is less likely to + * generate unnecessary stall cycles. + * TUNING: No longer true - find new NOP pattern. + */ +#define PADDING_MOV_R5_R5 0x1C2D + +uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { + for (; lir != NULL; lir = NEXT_LIR(lir)) { + if (!lir->flags.is_nop) { + int opcode = lir->opcode; + if (IsPseudoLirOp(opcode)) { + if (UNLIKELY(opcode == kPseudoPseudoAlign4)) { + // Note: size for this opcode will be either 0 or 2 depending on final alignment. + if (lir->offset & 0x2) { + write_pos[0] = (PADDING_MOV_R5_R5 & 0xff); + write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff); + write_pos += 2; + } + } + } else if (LIKELY(!lir->flags.is_nop)) { + const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + uint32_t bits = encoder->skeleton; + for (int i = 0; i < 4; i++) { + uint32_t operand; + uint32_t value; + operand = lir->operands[i]; + ArmEncodingKind kind = encoder->field_loc[i].kind; + if (LIKELY(kind == kFmtBitBlt)) { + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; + } else { + switch (encoder->field_loc[i].kind) { + case kFmtSkip: + break; // Nothing to do, but continue to next. + case kFmtUnused: + i = 4; // Done, break out of the enclosing loop. + break; + case kFmtFPImm: + value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; + value |= (operand & 0x0F) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtBrOffset: + value = ((operand & 0x80000) >> 19) << 26; + value |= ((operand & 0x40000) >> 18) << 11; + value |= ((operand & 0x20000) >> 17) << 13; + value |= ((operand & 0x1f800) >> 11) << 16; + value |= (operand & 0x007ff); + bits |= value; + break; + case kFmtShift5: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtShift: + value = ((operand & 0x70) >> 4) << 12; + value |= (operand & 0x0f) << 4; + bits |= value; + break; + case kFmtBWidth: + value = operand - 1; + bits |= value; + break; + case kFmtLsb: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtImm6: + value = ((operand & 0x20) >> 5) << 9; + value |= (operand & 0x1f) << 3; + bits |= value; + break; + case kFmtDfp: { + DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand; + uint32_t reg_num = RegStorage::RegNum(operand); + /* Snag the 1-bit slice and position it */ + value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= (reg_num & 0x0f) << encoder->field_loc[i].start; + bits |= value; + break; + } + case kFmtSfp: { + DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand; + uint32_t reg_num = RegStorage::RegNum(operand); + /* Snag the 1-bit slice and position it */ + value = (reg_num & 0x1) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start; + bits |= value; + break; + } + case kFmtImm12: + case kFmtModImm: + value = ((operand & 0x800) >> 11) << 26; + value |= ((operand & 0x700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtImm16: + value = ((operand & 0x0800) >> 11) << 26; + value |= ((operand & 0xf000) >> 12) << 16; + value |= ((operand & 0x0700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtOff24: { + uint32_t signbit = (operand >> 31) & 0x1; + uint32_t i1 = (operand >> 22) & 0x1; + uint32_t i2 = (operand >> 21) & 0x1; + uint32_t imm10 = (operand >> 11) & 0x03ff; + uint32_t imm11 = operand & 0x07ff; + uint32_t j1 = (i1 ^ signbit) ? 0 : 1; + uint32_t j2 = (i2 ^ signbit) ? 0 : 1; + value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | + imm11; + bits |= value; + } + break; + default: + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + } + } + } + if (encoder->size == 4) { + write_pos[0] = ((bits >> 16) & 0xff); + write_pos[1] = ((bits >> 24) & 0xff); + write_pos[2] = (bits & 0xff); + write_pos[3] = ((bits >> 8) & 0xff); + write_pos += 4; + } else { + DCHECK_EQ(encoder->size, 2); + write_pos[0] = (bits & 0xff); + write_pos[1] = ((bits >> 8) & 0xff); + write_pos += 2; + } + } + } + } + return write_pos; +} + +// Assemble the LIR into binary instruction format. +void Arm64Mir2Lir::AssembleLIR() { + LIR* lir; + LIR* prev_lir; + cu_->NewTimingSplit("Assemble"); + int assembler_retries = 0; + CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0); + data_offset_ = (starting_offset + 0x3) & ~0x3; + int32_t offset_adjustment; + AssignDataOffsets(); + + /* + * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for + * non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop. + */ + int generation = 0; + while (true) { + offset_adjustment = 0; + AssemblerStatus res = kSuccess; // Assume success + generation ^= 1; + // Note: nodes requring possible fixup linked in ascending order. + lir = first_fixup_; + prev_lir = NULL; + while (lir != NULL) { + /* + * NOTE: the lir being considered here will be encoded following the switch (so long as + * we're not in a retry situation). However, any new non-pc_rel instructions inserted + * due to retry must be explicitly encoded at the time of insertion. Note that + * inserted instructions don't need use/def flags, but do need size and pc-rel status + * properly updated. + */ + lir->offset += offset_adjustment; + // During pass, allows us to tell whether a node has been updated with offset_adjustment yet. + lir->flags.generation = generation; + switch (static_cast<FixupKind>(lir->flags.fixup)) { + case kFixupLabel: + case kFixupNone: + break; + case kFixupVLoad: + if (lir->operands[1] != rs_r15pc.GetReg()) { + break; + } + // NOTE: intentional fallthrough. + case kFixupLoad: { + /* + * PC-relative loads are mostly used to load immediates + * that are too large to materialize directly in one shot. + * However, if the load displacement exceeds the limit, + * we revert to a multiple-instruction materialization sequence. + */ + LIR *lir_target = lir->target; + CodeOffset pc = (lir->offset + 4) & ~3; + CodeOffset target = lir_target->offset + + ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (res != kSuccess) { + /* + * In this case, we're just estimating and will do it again for real. Ensure offset + * is legal. + */ + delta &= ~0x3; + } + DCHECK_EQ((delta & 0x3), 0); + // First, a sanity check for cases we shouldn't see now + if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || + ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) { + // Shouldn't happen in current codegen. + LOG(FATAL) << "Unexpected pc-rel offset " << delta; + } + // Now, check for the difficult cases + if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { + /* + * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we + * sometimes have to use it to fix up out-of-range accesses. This is where that + * happens. + */ + int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || + (lir->opcode == kThumb2LdrPcRel12)) ? lir->operands[0] : + rs_rARM_LR.GetReg(); + + // Add new Adr to generate the address. + LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, + base_reg, 0, 0, 0, 0, lir->target); + new_adr->offset = lir->offset; + new_adr->flags.fixup = kFixupAdr; + new_adr->flags.size = EncodingMap[kThumb2Adr].size; + InsertLIRBefore(lir, new_adr); + lir->offset += new_adr->flags.size; + offset_adjustment += new_adr->flags.size; + + // lir no longer pcrel, unlink and link in new_adr. + ReplaceFixup(prev_lir, lir, new_adr); + + // Convert to normal load. + offset_adjustment -= lir->flags.size; + if (lir->opcode == kThumb2LdrPcRel12) { + lir->opcode = kThumb2LdrRRI12; + } else if (lir->opcode == kThumb2LdrdPcRel8) { + lir->opcode = kThumb2LdrdI8; + } + lir->flags.size = EncodingMap[lir->opcode].size; + offset_adjustment += lir->flags.size; + // Change the load to be relative to the new Adr base. + if (lir->opcode == kThumb2LdrdI8) { + lir->operands[3] = 0; + lir->operands[2] = base_reg; + } else { + lir->operands[2] = 0; + lir->operands[1] = base_reg; + } + prev_lir = new_adr; // Continue scan with new_adr; + lir = new_adr->u.a.pcrel_next; + res = kRetryAll; + continue; + } else { + if ((lir->opcode == kThumb2Vldrs) || + (lir->opcode == kThumb2Vldrd) || + (lir->opcode == kThumb2LdrdPcRel8)) { + lir->operands[2] = delta >> 2; + } else { + lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : + delta >> 2; + } + } + break; + } + case kFixupCBxZ: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (delta > 126 || delta < 0) { + /* + * Convert to cmp rx,#0 / b[eq/ne] tgt pair + * Make new branch instruction and insert after + */ + LIR* new_inst = + RawLIR(lir->dalvik_offset, kThumbBCond, 0, + (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, + 0, 0, 0, lir->target); + InsertLIRAfter(lir, new_inst); + + /* Convert the cb[n]z to a cmp rx, #0 ] */ + // Subtract the old size. + offset_adjustment -= lir->flags.size; + lir->opcode = kThumbCmpRI8; + /* operand[0] is src1 in both cb[n]z & CmpRI8 */ + lir->operands[1] = 0; + lir->target = 0; + lir->flags.size = EncodingMap[lir->opcode].size; + // Add back the new size. + offset_adjustment += lir->flags.size; + // Set up the new following inst. + new_inst->offset = lir->offset + lir->flags.size; + new_inst->flags.fixup = kFixupCondBranch; + new_inst->flags.size = EncodingMap[new_inst->opcode].size; + offset_adjustment += new_inst->flags.size; + + // lir no longer pcrel, unlink and link in new_inst. + ReplaceFixup(prev_lir, lir, new_inst); + prev_lir = new_inst; // Continue with the new instruction. + lir = new_inst->u.a.pcrel_next; + res = kRetryAll; + continue; + } else { + lir->operands[1] = delta >> 1; + } + break; + } + case kFixupPushPop: { + if (__builtin_popcount(lir->operands[0]) == 1) { + /* + * The standard push/pop multiple instruction + * requires at least two registers in the list. + * If we've got just one, switch to the single-reg + * encoding. + */ + lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : + kThumb2Pop1; + int reg = 0; + while (lir->operands[0]) { + if (lir->operands[0] & 0x1) { + break; + } else { + reg++; + lir->operands[0] >>= 1; + } + } + lir->operands[0] = reg; + // This won't change again, don't bother unlinking, just reset fixup kind + lir->flags.fixup = kFixupNone; + } + break; + } + case kFixupCondBranch: { + LIR *target_lir = lir->target; + int32_t delta = 0; + DCHECK(target_lir); + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + delta = target - pc; + if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { + offset_adjustment -= lir->flags.size; + lir->opcode = kThumb2BCond; + lir->flags.size = EncodingMap[lir->opcode].size; + // Fixup kind remains the same. + offset_adjustment += lir->flags.size; + res = kRetryAll; + } + lir->operands[0] = delta >> 1; + break; + } + case kFixupT2Branch: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + lir->operands[0] = delta >> 1; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) { + // Useless branch + offset_adjustment -= lir->flags.size; + lir->flags.is_nop = true; + // Don't unlink - just set to do-nothing. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } + break; + } + case kFixupT1Branch: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (delta > 2046 || delta < -2048) { + // Convert to Thumb2BCond w/ kArmCondAl + offset_adjustment -= lir->flags.size; + lir->opcode = kThumb2BUncond; + lir->operands[0] = 0; + lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.fixup = kFixupT2Branch; + offset_adjustment += lir->flags.size; + res = kRetryAll; + } else { + lir->operands[0] = delta >> 1; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) { + // Useless branch + offset_adjustment -= lir->flags.size; + lir->flags.is_nop = true; + // Don't unlink - just set to do-nothing. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } + } + break; + } + case kFixupBlx1: { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); + /* cur_pc is Thumb */ + CodeOffset cur_pc = (lir->offset + 4) & ~3; + CodeOffset target = lir->operands[1]; + + /* Match bit[1] in target with base */ + if (cur_pc & 0x2) { + target |= 0x2; + } + int32_t delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + break; + } + case kFixupBl1: { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); + /* Both cur_pc and target are Thumb */ + CodeOffset cur_pc = lir->offset + 4; + CodeOffset target = lir->operands[1]; + + int32_t delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + break; + } + case kFixupAdr: { + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2])); + LIR* target = lir->target; + int32_t target_disp = (tab_rec != NULL) ? tab_rec->offset + offset_adjustment + : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : + offset_adjustment); + int32_t disp = target_disp - ((lir->offset + 4) & ~3); + if (disp < 4096) { + lir->operands[1] = disp; + } else { + // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] + // TUNING: if this case fires often, it can be improved. Not expected to be common. + LIR *new_mov16L = + RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0, + WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); + new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size; + new_mov16L->flags.fixup = kFixupMovImmLST; + new_mov16L->offset = lir->offset; + // Link the new instruction, retaining lir. + InsertLIRBefore(lir, new_mov16L); + lir->offset += new_mov16L->flags.size; + offset_adjustment += new_mov16L->flags.size; + InsertFixupBefore(prev_lir, lir, new_mov16L); + prev_lir = new_mov16L; // Now we've got a new prev. + LIR *new_mov16H = + RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0, + WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); + new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size; + new_mov16H->flags.fixup = kFixupMovImmHST; + new_mov16H->offset = lir->offset; + // Link the new instruction, retaining lir. + InsertLIRBefore(lir, new_mov16H); + lir->offset += new_mov16H->flags.size; + offset_adjustment += new_mov16H->flags.size; + InsertFixupBefore(prev_lir, lir, new_mov16H); + prev_lir = new_mov16H; // Now we've got a new prev. + + offset_adjustment -= lir->flags.size; + if (RegStorage::RegNum(lir->operands[0]) < 8) { + lir->opcode = kThumbAddRRLH; + } else { + lir->opcode = kThumbAddRRHH; + } + lir->operands[1] = rs_rARM_PC.GetReg(); + lir->flags.size = EncodingMap[lir->opcode].size; + offset_adjustment += lir->flags.size; + // Must stay in fixup list and have offset updated; will be used by LST/HSP pair. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } + break; + } + case kFixupMovImmLST: { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; + break; + } + case kFixupMovImmHST: { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = + ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; + break; + } + case kFixupAlign4: { + int32_t required_size = lir->offset & 0x2; + if (lir->flags.size != required_size) { + offset_adjustment += required_size - lir->flags.size; + lir->flags.size = required_size; + res = kRetryAll; + } + break; + } + default: + LOG(FATAL) << "Unexpected case " << lir->flags.fixup; + } + prev_lir = lir; + lir = lir->u.a.pcrel_next; + } + + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + starting_offset += offset_adjustment; + data_offset_ = (starting_offset + 0x3) & ~0x3; + AssignDataOffsets(); + } + } + + // Build the CodeBuffer. + DCHECK_LE(data_offset_, total_size_); + code_buffer_.reserve(total_size_); + code_buffer_.resize(starting_offset); + uint8_t* write_pos = &code_buffer_[0]; + write_pos = EncodeLIRs(write_pos, first_lir_insn_); + DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset); + + DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3); + + // Install literals + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); +} + +int Arm64Mir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); + return EncodingMap[lir->opcode].size; +} + +// Encode instruction bit pattern and assign offsets. +uint32_t Arm64Mir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) { + LIR* end_lir = tail_lir->next; + + LIR* last_fixup = NULL; + for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) { + if (!lir->flags.is_nop) { + if (lir->flags.fixup != kFixupNone) { + if (!IsPseudoLirOp(lir->opcode)) { + lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.fixup = EncodingMap[lir->opcode].fixup; + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + lir->flags.size = (offset & 0x2); + lir->flags.fixup = kFixupAlign4; + } else { + lir->flags.size = 0; + lir->flags.fixup = kFixupLabel; + } + // Link into the fixup chain. + lir->flags.use_def_invalid = true; + lir->u.a.pcrel_next = NULL; + if (first_fixup_ == NULL) { + first_fixup_ = lir; + } else { + last_fixup->u.a.pcrel_next = lir; + } + last_fixup = lir; + lir->offset = offset; + } + offset += lir->flags.size; + } + } + return offset; +} + +void Arm64Mir2Lir::AssignDataOffsets() { + /* Set up offsets for literals */ + CodeOffset offset = data_offset_; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + total_size_ = AssignFillArrayDataOffset(offset); +} + +} // namespace art diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc new file mode 100644 index 000000000..9dfee6ef2 --- /dev/null +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -0,0 +1,477 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the Thumb2 ISA. */ + +#include "arm64_lir.h" +#include "codegen_arm64.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "entrypoints/quick/quick_entrypoints.h" + +namespace art { + +/* + * The sparse table in the literal pool is an array of <key,displacement> + * pairs. For each set, we'll load them as a pair using ldmia. + * This means that the register number of the temp we use for the key + * must be lower than the reg for the displacement. + * + * The test loop will look something like: + * + * adr r_base, <table> + * ldr r_val, [rARM_SP, v_reg_off] + * mov r_idx, #table_size + * lp: + * ldmia r_base!, {r_key, r_disp} + * sub r_idx, #1 + * cmp r_val, r_key + * ifeq + * add rARM_PC, r_disp ; This is the branch from which we compute displacement + * cbnz r_idx, lp + */ +void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpSparseSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + uint32_t size = table[1]; + tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); + switch_tables_.Insert(tab_rec); + + // Get the switch value + rl_src = LoadValue(rl_src, kCoreReg); + RegStorage r_base = AllocTemp(); + /* Allocate key and disp temps */ + RegStorage r_key = AllocTemp(); + RegStorage r_disp = AllocTemp(); + // Make sure r_key's register number is less than r_disp's number for ldmia + if (r_key.GetReg() > r_disp.GetReg()) { + RegStorage tmp = r_disp; + r_disp = r_key; + r_key = tmp; + } + // Materialize a pointer to the switch table + NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec)); + // Set up r_idx + RegStorage r_idx = AllocTemp(); + LoadConstant(r_idx, size); + // Establish loop branch target + LIR* target = NewLIR0(kPseudoTargetLabel); + // Load next key/disp + NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum())); + OpRegReg(kOpCmp, r_key, rl_src.reg); + // Go if match. NOTE: No instruction set switch here - must stay Thumb2 + LIR* it = OpIT(kCondEq, ""); + LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg()); + OpEndIT(it); + tab_rec->anchor = switch_branch; + // Needs to use setflags encoding here + OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags. + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + OpCondBranch(kCondNe, target); +} + + +void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpPackedSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + uint32_t size = table[1]; + tab_rec->targets = + static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); + switch_tables_.Insert(tab_rec); + + // Get the switch value + rl_src = LoadValue(rl_src, kCoreReg); + RegStorage table_base = AllocTemp(); + // Materialize a pointer to the switch table + NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec)); + int low_key = s4FromSwitchData(&table[2]); + RegStorage keyReg; + // Remove the bias, if necessary + if (low_key == 0) { + keyReg = rl_src.reg; + } else { + keyReg = AllocTemp(); + OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key); + } + // Bounds check - if < 0 or >= size continue following switch + OpRegImm(kOpCmp, keyReg, size-1); + LIR* branch_over = OpCondBranch(kCondHi, NULL); + + // Load the displacement from the switch table + RegStorage disp_reg = AllocTemp(); + LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32); + + // ..and go! NOTE: No instruction set switch here - must stay Thumb2 + LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg()); + tab_rec->anchor = switch_branch; + + /* branch_over target here */ + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; +} + +/* + * Array data table format: + * ushort ident = 0x0300 magic value + * ushort width width of each element in the table + * uint size number of elements in the table + * ubyte data[size*width] table of data values (may contain a single-byte + * padding at the end) + * + * Total size is 4+(width * size + 1)/2 16-bit code units. + */ +void Arm64Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + // Add the table to the list - we'll process it later + FillArrayData *tab_rec = + static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + uint16_t width = tab_rec->table[1]; + uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16); + tab_rec->size = (size * width) + 8; + + fill_array_data_.Insert(tab_rec); + + // Making a call - use explicit registers + FlushAllRegs(); /* Everything to home location */ + LoadValueDirectFixed(rl_src, rs_r0); + LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(), + rs_rARM_LR); + // Materialize a pointer to the fill data image + NewLIR3(kThumb2Adr, rs_r1.GetReg(), 0, WrapPointer(tab_rec)); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); + MarkSafepointPC(call_inst); +} + +/* + * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more + * details see monitor.cc. + */ +void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { + FlushAllRegs(); + // FIXME: need separate LoadValues for object references. + LoadValueDirectFixed(rl_src, rs_r0); // Get obj + LockCallTemps(); // Prepare for explicit register usage + constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. + if (kArchVariantHasGoodBranchPredictor) { + LIR* null_check_branch = nullptr; + if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { + null_check_branch = nullptr; // No null check. + } else { + // If the null-check fails its handled by the slow-path to reduce exception related meta-data. + if (Runtime::Current()->ExplicitNullChecks()) { + null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + } + } + Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); + NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + MarkPossibleNullPointerException(opt_flags); + LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); + NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); + + + LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + not_unlocked_branch->target = slow_path_target; + if (null_check_branch != nullptr) { + null_check_branch->target = slow_path_target; + } + // TODO: move to a slow path. + // Go expensive route - artLockObjectFromCode(obj); + LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); + MarkSafepointPC(call_inst); + + LIR* success_target = NewLIR0(kPseudoTargetLabel); + lock_success_branch->target = success_target; + GenMemBarrier(kLoadLoad); + } else { + // Explicit null-check as slow-path is entered using an IT. + GenNullCheck(rs_r0, opt_flags); + Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); + NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + MarkPossibleNullPointerException(opt_flags); + OpRegImm(kOpCmp, rs_r1, 0); + LIR* it = OpIT(kCondEq, ""); + NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(), + mirror::Object::MonitorOffset().Int32Value() >> 2); + OpEndIT(it); + OpRegImm(kOpCmp, rs_r1, 0); + it = OpIT(kCondNe, "T"); + // Go expensive route - artLockObjectFromCode(self, obj); + LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), + rs_rARM_LR); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); + OpEndIT(it); + MarkSafepointPC(call_inst); + GenMemBarrier(kLoadLoad); + } +} + +/* + * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more + * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock + * and can only give away ownership if its suspended. + */ +void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { + FlushAllRegs(); + LoadValueDirectFixed(rl_src, rs_r0); // Get obj + LockCallTemps(); // Prepare for explicit register usage + LIR* null_check_branch = nullptr; + Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); + constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. + if (kArchVariantHasGoodBranchPredictor) { + if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { + null_check_branch = nullptr; // No null check. + } else { + // If the null-check fails its handled by the slow-path to reduce exception related meta-data. + if (Runtime::Current()->ExplicitNullChecks()) { + null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); + } + } + Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); + MarkPossibleNullPointerException(opt_flags); + LoadConstantNoClobber(rs_r3, 0); + LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); + Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); + LIR* unlock_success_branch = OpUnconditionalBranch(NULL); + + LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + slow_unlock_branch->target = slow_path_target; + if (null_check_branch != nullptr) { + null_check_branch->target = slow_path_target; + } + // TODO: move to a slow path. + // Go expensive route - artUnlockObjectFromCode(obj); + LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); + MarkSafepointPC(call_inst); + + LIR* success_target = NewLIR0(kPseudoTargetLabel); + unlock_success_branch->target = success_target; + GenMemBarrier(kStoreLoad); + } else { + // Explicit null-check as slow-path is entered using an IT. + GenNullCheck(rs_r0, opt_flags); + Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock + MarkPossibleNullPointerException(opt_flags); + Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); + LoadConstantNoClobber(rs_r3, 0); + // Is lock unheld on lock or held by us (==thread_id) on unlock? + OpRegReg(kOpCmp, rs_r1, rs_r2); + LIR* it = OpIT(kCondEq, "EE"); + Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); + // Go expensive route - UnlockObjectFromCode(obj); + LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), + rs_rARM_LR); + ClobberCallerSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); + OpEndIT(it); + MarkSafepointPC(call_inst); + GenMemBarrier(kStoreLoad); + } +} + +void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) { + int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage reset_reg = AllocTemp(); + Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg); + LoadConstant(reset_reg, 0); + Store32Disp(rs_rARM_SELF, ex_offset, reset_reg); + FreeTemp(reset_reg); + StoreValue(rl_dest, rl_result); +} + +/* + * Mark garbage collection card. Skip if the value we're storing is null. + */ +void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { + RegStorage reg_card_base = AllocTemp(); + RegStorage reg_card_no = AllocTemp(); + LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); + LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base); + OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); + StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte); + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; + FreeTemp(reg_card_base); + FreeTemp(reg_card_no); +} + +void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + int spill_count = num_core_spills_ + num_fp_spills_; + /* + * On entry, r0, r1, r2 & r3 are live. Let the register allocation + * mechanism know so it doesn't try to use any of them when + * expanding the frame or flushing. This leaves the utility + * code with a single temp: r12. This should be enough. + */ + LockTemp(rs_r0); + LockTemp(rs_r1); + LockTemp(rs_r2); + LockTemp(rs_r3); + + /* + * We can safely skip the stack overflow check if we're + * a leaf *and* our frame size < fudge factor. + */ + bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && + (static_cast<size_t>(frame_size_) < + Thread::kStackOverflowReservedBytes)); + NewLIR0(kPseudoMethodEntry); + if (!skip_overflow_check) { + if (Runtime::Current()->ExplicitStackOverflowChecks()) { + /* Load stack limit */ + Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12); + } + } + /* Spill core callee saves */ + NewLIR1(kThumb2Push, core_spill_mask_); + /* Need to spill any FP regs? */ + if (num_fp_spills_) { + /* + * NOTE: fp spills are a little different from core spills in that + * they are pushed as a contiguous block. When promoting from + * the fp set, we must allocate all singles from s16..highest-promoted + */ + NewLIR1(kThumb2VPushCS, num_fp_spills_); + } + + const int spill_size = spill_count * 4; + const int frame_size_without_spills = frame_size_ - spill_size; + if (!skip_overflow_check) { + if (Runtime::Current()->ExplicitStackOverflowChecks()) { + class StackOverflowSlowPath : public LIRSlowPath { + public: + StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace) + : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr), + sp_displace_(sp_displace) { + } + void Compile() OVERRIDE { + m2l_->ResetRegPool(); + m2l_->ResetDefTracking(); + GenerateTargetLabel(kPseudoThrowTarget); + if (restore_lr_) { + m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); + } + m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); + m2l_->ClobberCallerSave(); + ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); + // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes + // codegen and target are in thumb2 mode. + // NOTE: native pointer. + m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); + } + + private: + const bool restore_lr_; + const size_t sp_displace_; + }; + if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) { + OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills); + LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr); + // Need to restore LR since we used it as a temp. + AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size)); + OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack + } else { + // If the frame is small enough we are guaranteed to have enough space that remains to + // handle signals on the user stack. + OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); + LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); + AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_)); + } + } else { + // Implicit stack overflow check. + // Generate a load from [sp, #-overflowsize]. If this is in the stack + // redzone we will get a segmentation fault. + // + // Caveat coder: if someone changes the kStackOverflowReservedBytes value + // we need to make sure that it's loadable in an immediate field of + // a sub instruction. Otherwise we will get a temp allocation and the + // code size will increase. + OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); + Load32Disp(rs_r12, 0, rs_r12); + MarkPossibleStackOverflowException(); + OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + } + } else { + OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + } + + FlushIns(ArgLocs, rl_method); + + FreeTemp(rs_r0); + FreeTemp(rs_r1); + FreeTemp(rs_r2); + FreeTemp(rs_r3); +} + +void Arm64Mir2Lir::GenExitSequence() { + int spill_count = num_core_spills_ + num_fp_spills_; + /* + * In the exit path, r0/r1 are live - make sure they aren't + * allocated by the register utilities as temps. + */ + LockTemp(rs_r0); + LockTemp(rs_r1); + + NewLIR0(kPseudoMethodExit); + OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); + /* Need to restore any FP callee saves? */ + if (num_fp_spills_) { + NewLIR1(kThumb2VPopCS, num_fp_spills_); + } + if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) { + /* Unspill rARM_LR to rARM_PC */ + core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum()); + core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum()); + } + NewLIR1(kThumb2Pop, core_spill_mask_); + if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) { + /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ + NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); + } +} + +void Arm64Mir2Lir::GenSpecialExitSequence() { + NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); +} + +} // namespace art diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h new file mode 100644 index 000000000..94c2563ae --- /dev/null +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_ +#define ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_ + +#include "arm64_lir.h" +#include "dex/compiler_internals.h" + +namespace art { + +class Arm64Mir2Lir FINAL : public Mir2Lir { + public: + Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + + // Required for target - codegen helpers. + bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src, + RegLocation rl_dest, int lit); + bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE; + LIR* CheckSuspendUsingLoad() OVERRIDE; + RegStorage LoadHelper(ThreadOffset<4> offset); + LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size, + int s_reg); + LIR* LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, int s_reg); + LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, + OpSize size); + LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, + RegStorage r_dest, OpSize size, int s_reg); + LIR* LoadConstantNoClobber(RegStorage r_dest, int value); + LIR* LoadConstantWide(RegStorage r_dest, int64_t value); + LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size); + LIR* StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src); + LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, + OpSize size); + LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, + RegStorage r_src, OpSize size, int s_reg); + void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg); + + // Required for target - register utilities. + RegStorage AllocTypedTemp(bool fp_hint, int reg_class); + RegStorage AllocTypedTempWide(bool fp_hint, int reg_class); + RegStorage TargetReg(SpecialTargetRegister reg); + RegStorage GetArgMappingToPhysicalReg(int arg_num); + RegLocation GetReturnAlt(); + RegLocation GetReturnWideAlt(); + RegLocation LocCReturn(); + RegLocation LocCReturnDouble(); + RegLocation LocCReturnFloat(); + RegLocation LocCReturnWide(); + uint64_t GetRegMaskCommon(RegStorage reg); + void AdjustSpillMask(); + void ClobberCallerSave(); + void FreeCallTemps(); + void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free); + void LockCallTemps(); + void MarkPreservedSingle(int v_reg, RegStorage reg); + void MarkPreservedDouble(int v_reg, RegStorage reg); + void CompilerInitializeRegAlloc(); + RegStorage AllocPreservedDouble(int s_reg); + + // Required for target - miscellaneous. + void AssembleLIR(); + uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset); + int AssignInsnOffsets(); + void AssignOffsets(); + static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir); + void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); + const char* GetTargetInstFmt(int opcode); + const char* GetTargetInstName(int opcode); + std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); + uint64_t GetPCUseDefEncoding(); + uint64_t GetTargetInstFlags(int opcode); + int GetInsnSize(LIR* lir); + bool IsUnconditionalBranch(LIR* lir); + + // Required for target - Dalvik-level generators. + void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src, int scale, bool card_mark); + void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); + bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object); + bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); + bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); + RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); + void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivZeroCheckWide(RegStorage reg); + void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); + void GenExitSequence(); + void GenSpecialExitSequence(); + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); + void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); + void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); + void GenSelect(BasicBlock* bb, MIR* mir); + void GenMemBarrier(MemBarrierKind barrier_kind); + void GenMonitorEnter(int opt_flags, RegLocation rl_src); + void GenMonitorExit(int opt_flags, RegLocation rl_src); + void GenMoveException(RegLocation rl_dest); + void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, + int first_bit, int second_bit); + void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); + void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + + // Required for target - single operation generators. + LIR* OpUnconditionalBranch(LIR* target); + LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target); + LIR* OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target); + LIR* OpCondBranch(ConditionCode cc, LIR* target); + LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target); + LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src); + LIR* OpIT(ConditionCode cond, const char* guide); + void OpEndIT(LIR* it); + LIR* OpMem(OpKind op, RegStorage r_base, int disp); + LIR* OpPcRelLoad(RegStorage reg, LIR* target); + LIR* OpReg(OpKind op, RegStorage r_dest_src); + void OpRegCopy(RegStorage r_dest, RegStorage r_src); + LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src); + LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value); + LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset); + LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2); + LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type); + LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type); + LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src); + LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value); + LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2); + LIR* OpTestSuspend(LIR* target); + LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset); + LIR* OpVldm(RegStorage r_base, int count); + LIR* OpVstm(RegStorage r_base, int count); + void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset); + void OpRegCopyWide(RegStorage dest, RegStorage src); + void OpTlsCmp(ThreadOffset<4> offset, int val); + + LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size, + int s_reg); + LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size); + LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2, + int shift); + LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift); + static const ArmEncodingMap EncodingMap[kArmLast]; + int EncodeShift(int code, int amount); + int ModifiedImmediate(uint32_t value); + ArmConditionCode ArmConditionEncoding(ConditionCode code); + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + private: + void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, + ConditionCode ccode); + LIR* LoadFPConstantValue(int r_dest, int value); + void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void AssignDataOffsets(); + RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + bool is_div, bool check_zero); + RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div); + typedef struct { + OpKind op; + uint32_t shift; + } EasyMultiplyOp; + bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op); + bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops); + void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops); +}; + +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_ diff --git a/compiler/dex/quick/arm64/create.sh b/compiler/dex/quick/arm64/create.sh new file mode 100644 index 000000000..a3833bdc3 --- /dev/null +++ b/compiler/dex/quick/arm64/create.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +if [ ! -d ./arm ]; then + echo "Directory ./arm not found." + exit 1 +fi + +mkdir -p arm64 +dst=`cd arm64 && pwd` +cd arm/ +for f in *; do + cp $f $dst/`echo $f | sed 's/arm/arm64/g'` +done + +sed -i 's,ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_,ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_,g' $dst/arm64_lir.h +sed -i 's,ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_,ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_,g' $dst/codegen_arm64.h +sed -i -e 's,ArmMir2Lir,Arm64Mir2Lir,g' -e 's,arm_lir.h,arm64_lir.h,g' -e 's,codegen_arm.h,codegen_arm64.h,g' $dst/*.h $dst/*.cc diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc new file mode 100644 index 000000000..96842836a --- /dev/null +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -0,0 +1,358 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm64_lir.h" +#include "codegen_arm64.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + int op = kThumbBkpt; + RegLocation rl_result; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (opcode) { + case Instruction::ADD_FLOAT_2ADDR: + case Instruction::ADD_FLOAT: + op = kThumb2Vadds; + break; + case Instruction::SUB_FLOAT_2ADDR: + case Instruction::SUB_FLOAT: + op = kThumb2Vsubs; + break; + case Instruction::DIV_FLOAT_2ADDR: + case Instruction::DIV_FLOAT: + op = kThumb2Vdivs; + break; + case Instruction::MUL_FLOAT_2ADDR: + case Instruction::MUL_FLOAT: + op = kThumb2Vmuls; + break; + case Instruction::REM_FLOAT_2ADDR: + case Instruction::REM_FLOAT: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2, + false); + rl_result = GetReturn(true); + StoreValue(rl_dest, rl_result); + return; + case Instruction::NEG_FLOAT: + GenNegFloat(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + StoreValue(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + int op = kThumbBkpt; + RegLocation rl_result; + + switch (opcode) { + case Instruction::ADD_DOUBLE_2ADDR: + case Instruction::ADD_DOUBLE: + op = kThumb2Vaddd; + break; + case Instruction::SUB_DOUBLE_2ADDR: + case Instruction::SUB_DOUBLE: + op = kThumb2Vsubd; + break; + case Instruction::DIV_DOUBLE_2ADDR: + case Instruction::DIV_DOUBLE: + op = kThumb2Vdivd; + break; + case Instruction::MUL_DOUBLE_2ADDR: + case Instruction::MUL_DOUBLE: + op = kThumb2Vmuld; + break; + case Instruction::REM_DOUBLE_2ADDR: + case Instruction::REM_DOUBLE: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2, + false); + rl_result = GetReturnWide(true); + StoreValueWide(rl_dest, rl_result); + return; + case Instruction::NEG_DOUBLE: + GenNegDouble(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + + rl_src1 = LoadValueWide(rl_src1, kFPReg); + DCHECK(rl_src1.wide); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + DCHECK(rl_src2.wide); + rl_result = EvalLoc(rl_dest, kFPReg, true); + DCHECK(rl_dest.wide); + DCHECK(rl_result.wide); + NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) { + int op = kThumbBkpt; + int src_reg; + RegLocation rl_result; + + switch (opcode) { + case Instruction::INT_TO_FLOAT: + op = kThumb2VcvtIF; + break; + case Instruction::FLOAT_TO_INT: + op = kThumb2VcvtFI; + break; + case Instruction::DOUBLE_TO_FLOAT: + op = kThumb2VcvtDF; + break; + case Instruction::FLOAT_TO_DOUBLE: + op = kThumb2VcvtFd; + break; + case Instruction::INT_TO_DOUBLE: + op = kThumb2VcvtF64S32; + break; + case Instruction::DOUBLE_TO_INT: + op = kThumb2VcvtDI; + break; + case Instruction::LONG_TO_DOUBLE: { + rl_src = LoadValueWide(rl_src, kFPReg); + RegStorage src_low = rl_src.reg.DoubleToLowSingle(); + RegStorage src_high = rl_src.reg.DoubleToHighSingle(); + rl_result = EvalLoc(rl_dest, kFPReg, true); + RegStorage tmp1 = AllocTempDouble(); + RegStorage tmp2 = AllocTempDouble(); + + NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg()); + NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg()); + LoadConstantWide(tmp2, 0x41f0000000000000LL); + NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg()); + FreeTemp(tmp1); + FreeTemp(tmp2); + StoreValueWide(rl_dest, rl_result); + return; + } + case Instruction::FLOAT_TO_LONG: + GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src); + return; + case Instruction::LONG_TO_FLOAT: { + rl_src = LoadValueWide(rl_src, kFPReg); + RegStorage src_low = rl_src.reg.DoubleToLowSingle(); + RegStorage src_high = rl_src.reg.DoubleToHighSingle(); + rl_result = EvalLoc(rl_dest, kFPReg, true); + // Allocate temp registers. + RegStorage high_val = AllocTempDouble(); + RegStorage low_val = AllocTempDouble(); + RegStorage const_val = AllocTempDouble(); + // Long to double. + NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg()); + NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg()); + LoadConstantWide(const_val, INT64_C(0x41f0000000000000)); + NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg()); + // Double to float. + NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg()); + // Free temp registers. + FreeTemp(high_val); + FreeTemp(low_val); + FreeTemp(const_val); + // Store result. + StoreValue(rl_dest, rl_result); + return; + } + case Instruction::DOUBLE_TO_LONG: + GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + if (rl_src.wide) { + rl_src = LoadValueWide(rl_src, kFPReg); + src_reg = rl_src.reg.GetReg(); + } else { + rl_src = LoadValue(rl_src, kFPReg); + src_reg = rl_src.reg.GetReg(); + } + if (rl_dest.wide) { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, rl_result.reg.GetReg(), src_reg); + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, rl_result.reg.GetReg(), src_reg); + StoreValue(rl_dest, rl_result); + } +} + +void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, + bool is_double) { + LIR* target = &block_label_list_[bb->taken]; + RegLocation rl_src1; + RegLocation rl_src2; + if (is_double) { + rl_src1 = mir_graph_->GetSrcWide(mir, 0); + rl_src2 = mir_graph_->GetSrcWide(mir, 2); + rl_src1 = LoadValueWide(rl_src1, kFPReg); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + } else { + rl_src1 = mir_graph_->GetSrc(mir, 0); + rl_src2 = mir_graph_->GetSrc(mir, 1); + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + } + NewLIR0(kThumb2Fmstat); + ConditionCode ccode = mir->meta.ccode; + switch (ccode) { + case kCondEq: + case kCondNe: + break; + case kCondLt: + if (gt_bias) { + ccode = kCondMi; + } + break; + case kCondLe: + if (gt_bias) { + ccode = kCondLs; + } + break; + case kCondGt: + if (gt_bias) { + ccode = kCondHi; + } + break; + case kCondGe: + if (gt_bias) { + ccode = kCondUge; + } + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCondBranch(ccode, target); +} + + +void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + bool is_double = false; + int default_result = -1; + RegLocation rl_result; + + switch (opcode) { + case Instruction::CMPL_FLOAT: + is_double = false; + default_result = -1; + break; + case Instruction::CMPG_FLOAT: + is_double = false; + default_result = 1; + break; + case Instruction::CMPL_DOUBLE: + is_double = true; + default_result = -1; + break; + case Instruction::CMPG_DOUBLE: + is_double = true; + default_result = 1; + break; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + if (is_double) { + rl_src1 = LoadValueWide(rl_src1, kFPReg); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadConstant(rl_result.reg, default_result); + NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + } else { + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadConstant(rl_result.reg, default_result); + NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); + } + DCHECK(!rl_result.reg.IsFloat()); + NewLIR0(kThumb2Fmstat); + + LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, ""); + NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(), + ModifiedImmediate(-default_result)); // Must not alter ccodes + OpEndIT(it); + + it = OpIT(kCondEq, ""); + LoadConstant(rl_result.reg, 0); + OpEndIT(it); + + StoreValue(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { + RegLocation rl_result; + rl_src = LoadValue(rl_src, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + StoreValue(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { + RegLocation rl_result; + rl_src = LoadValueWide(rl_src, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + StoreValueWide(rl_dest, rl_result); +} + +bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) { + DCHECK_EQ(cu_->instruction_set, kThumb2); + LIR *branch; + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTargetWide(info); // double place for result + rl_src = LoadValueWide(rl_src, kFPReg); + RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + NewLIR0(kThumb2Fmstat); + branch = NewLIR2(kThumbBCond, 0, kArmCondEq); + ClobberCallerSave(); + LockCallTemps(); // Using fixed registers + RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt)); + NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg()); + NewLIR1(kThumbBlxR, r_tgt.GetReg()); + NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg()); + branch->target = NewLIR0(kPseudoTargetLabel); + StoreValueWide(rl_dest, rl_result); + return true; +} + + +} // namespace art diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc new file mode 100644 index 000000000..11fb76571 --- /dev/null +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -0,0 +1,1460 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the Thumb2 ISA. */ + +#include "arm64_lir.h" +#include "codegen_arm64.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "mirror/array.h" + +namespace art { + +LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { + OpRegReg(kOpCmp, src1, src2); + return OpCondBranch(cond, target); +} + +/* + * Generate a Thumb2 IT instruction, which can nullify up to + * four subsequent instructions based on a condition and its + * inverse. The condition applies to the first instruction, which + * is executed if the condition is met. The string "guide" consists + * of 0 to 3 chars, and applies to the 2nd through 4th instruction. + * A "T" means the instruction is executed if the condition is + * met, and an "E" means the instruction is executed if the condition + * is not met. + */ +LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) { + int mask; + int mask3 = 0; + int mask2 = 0; + int mask1 = 0; + ArmConditionCode code = ArmConditionEncoding(ccode); + int cond_bit = code & 1; + int alt_bit = cond_bit ^ 1; + + // Note: case fallthroughs intentional + switch (strlen(guide)) { + case 3: + mask1 = (guide[2] == 'T') ? cond_bit : alt_bit; + case 2: + mask2 = (guide[1] == 'T') ? cond_bit : alt_bit; + case 1: + mask3 = (guide[0] == 'T') ? cond_bit : alt_bit; + break; + case 0: + break; + default: + LOG(FATAL) << "OAT: bad case in OpIT"; + } + mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | + (1 << (3 - strlen(guide))); + return NewLIR2(kThumb2It, code, mask); +} + +void Arm64Mir2Lir::OpEndIT(LIR* it) { + // TODO: use the 'it' pointer to do some checks with the LIR, for example + // we could check that the number of instructions matches the mask + // in the IT instruction. + CHECK(it != nullptr); + GenBarrier(); +} + +/* + * 64-bit 3way compare function. + * mov rX, #-1 + * cmp op1hi, op2hi + * blt done + * bgt flip + * sub rX, op1lo, op2lo (treat as unsigned) + * beq done + * ite hi + * mov(hi) rX, #-1 + * mov(!hi) rX, #1 + * flip: + * neg rX + * done: + */ +void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + LIR* target1; + LIR* target2; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + RegStorage t_reg = AllocTemp(); + LoadConstant(t_reg, -1); + OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); + LIR* branch1 = OpCondBranch(kCondLt, NULL); + LIR* branch2 = OpCondBranch(kCondGt, NULL); + OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); + LIR* branch3 = OpCondBranch(kCondEq, NULL); + + LIR* it = OpIT(kCondHi, "E"); + NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1)); + LoadConstant(t_reg, 1); + OpEndIT(it); + + target2 = NewLIR0(kPseudoTargetLabel); + OpRegReg(kOpNeg, t_reg, t_reg); + + target1 = NewLIR0(kPseudoTargetLabel); + + RegLocation rl_temp = LocCReturn(); // Just using as template, will change + rl_temp.reg.SetReg(t_reg.GetReg()); + StoreValue(rl_dest, rl_temp); + FreeTemp(t_reg); + + branch1->target = target1; + branch2->target = target2; + branch3->target = branch1->target; +} + +void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode) { + int32_t val_lo = Low32Bits(val); + int32_t val_hi = High32Bits(val); + DCHECK_GE(ModifiedImmediate(val_lo), 0); + DCHECK_GE(ModifiedImmediate(val_hi), 0); + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegStorage low_reg = rl_src1.reg.GetLow(); + RegStorage high_reg = rl_src1.reg.GetHigh(); + + if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { + RegStorage t_reg = AllocTemp(); + NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0); + FreeTemp(t_reg); + OpCondBranch(ccode, taken); + return; + } + + switch (ccode) { + case kCondEq: + case kCondNe: + OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken); + break; + case kCondLt: + OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); + ccode = kCondUlt; + break; + case kCondLe: + OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); + ccode = kCondLs; + break; + case kCondGt: + OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); + ccode = kCondHi; + break; + case kCondGe: + OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); + ccode = kCondUge; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCmpImmBranch(ccode, low_reg, val_lo, taken); +} + +void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { + RegLocation rl_result; + RegLocation rl_src = mir_graph_->GetSrc(mir, 0); + RegLocation rl_dest = mir_graph_->GetDest(mir); + rl_src = LoadValue(rl_src, kCoreReg); + ConditionCode ccode = mir->meta.ccode; + if (mir->ssa_rep->num_uses == 1) { + // CONST case + int true_val = mir->dalvikInsn.vB; + int false_val = mir->dalvikInsn.vC; + rl_result = EvalLoc(rl_dest, kCoreReg, true); + // Change kCondNe to kCondEq for the special cases below. + if (ccode == kCondNe) { + ccode = kCondEq; + std::swap(true_val, false_val); + } + bool cheap_false_val = InexpensiveConstantInt(false_val); + if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { + OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, ""); + LoadConstant(rl_result.reg, false_val); + OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { + OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + LIR* it = OpIT(kCondLs, ""); + LoadConstant(rl_result.reg, false_val); + OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + } else if (cheap_false_val && InexpensiveConstantInt(true_val)) { + OpRegImm(kOpCmp, rl_src.reg, 0); + LIR* it = OpIT(ccode, "E"); + LoadConstant(rl_result.reg, true_val); + LoadConstant(rl_result.reg, false_val); + OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + } else { + // Unlikely case - could be tuned. + RegStorage t_reg1 = AllocTemp(); + RegStorage t_reg2 = AllocTemp(); + LoadConstant(t_reg1, true_val); + LoadConstant(t_reg2, false_val); + OpRegImm(kOpCmp, rl_src.reg, 0); + LIR* it = OpIT(ccode, "E"); + OpRegCopy(rl_result.reg, t_reg1); + OpRegCopy(rl_result.reg, t_reg2); + OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + } + } else { + // MOVE case + RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; + RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; + rl_true = LoadValue(rl_true, kCoreReg); + rl_false = LoadValue(rl_false, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegImm(kOpCmp, rl_src.reg, 0); + LIR* it = nullptr; + if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? + it = OpIT(NegateComparison(ccode), ""); + OpRegCopy(rl_result.reg, rl_false.reg); + } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place? + it = OpIT(ccode, ""); + OpRegCopy(rl_result.reg, rl_true.reg); + } else { // Normal - select between the two. + it = OpIT(ccode, "E"); + OpRegCopy(rl_result.reg, rl_true.reg); + OpRegCopy(rl_result.reg, rl_false.reg); + } + OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact + } + StoreValue(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { + RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); + RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); + // Normalize such that if either operand is constant, src2 will be constant. + ConditionCode ccode = mir->meta.ccode; + if (rl_src1.is_const) { + std::swap(rl_src1, rl_src2); + ccode = FlipComparisonOrder(ccode); + } + if (rl_src2.is_const) { + RegLocation rl_temp = UpdateLocWide(rl_src2); + // Do special compare/branch against simple const operand if not already in registers. + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + if ((rl_temp.location != kLocPhysReg) && + ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { + GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); + return; + } + } + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); + switch (ccode) { + case kCondEq: + OpCondBranch(kCondNe, not_taken); + break; + case kCondNe: + OpCondBranch(kCondNe, taken); + break; + case kCondLt: + OpCondBranch(kCondLt, taken); + OpCondBranch(kCondGt, not_taken); + ccode = kCondUlt; + break; + case kCondLe: + OpCondBranch(kCondLt, taken); + OpCondBranch(kCondGt, not_taken); + ccode = kCondLs; + break; + case kCondGt: + OpCondBranch(kCondGt, taken); + OpCondBranch(kCondLt, not_taken); + ccode = kCondHi; + break; + case kCondGe: + OpCondBranch(kCondGt, taken); + OpCondBranch(kCondLt, not_taken); + ccode = kCondUge; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); + OpCondBranch(ccode, taken); +} + +/* + * Generate a register comparison to an immediate and branch. Caller + * is responsible for setting branch target field. + */ +LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) { + LIR* branch; + ArmConditionCode arm_cond = ArmConditionEncoding(cond); + /* + * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit + * compare-and-branch if zero is ideal if it will reach. However, because null checks + * branch forward to a slow path, they will frequently not reach - and thus have to + * be converted to a long form during assembly (which will trigger another assembly + * pass). Here we estimate the branch distance for checks, and if large directly + * generate the long form in an attempt to avoid an extra assembly pass. + * TODO: consider interspersing slowpaths in code following unconditional branches. + */ + bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget)); + skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64); + if (!skip && reg.Low8() && (check_value == 0) && + ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { + branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, + reg.GetReg(), 0); + } else { + OpRegImm(kOpCmp, reg, check_value); + branch = NewLIR2(kThumbBCond, 0, arm_cond); + } + branch->target = target; + return branch; +} + +LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { + LIR* res; + int opcode; + // If src or dest is a pair, we'll be using low reg. + if (r_dest.IsPair()) { + r_dest = r_dest.GetLow(); + } + if (r_src.IsPair()) { + r_src = r_src.GetLow(); + } + if (r_dest.IsFloat() || r_src.IsFloat()) + return OpFpRegCopy(r_dest, r_src); + if (r_dest.Low8() && r_src.Low8()) + opcode = kThumbMovRR; + else if (!r_dest.Low8() && !r_src.Low8()) + opcode = kThumbMovRR_H2H; + else if (r_dest.Low8()) + opcode = kThumbMovRR_H2L; + else + opcode = kThumbMovRR_L2H; + res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { + if (r_dest != r_src) { + LIR* res = OpRegCopyNoInsert(r_dest, r_src); + AppendLIR(res); + } +} + +void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { + if (r_dest != r_src) { + bool dest_fp = r_dest.IsFloat(); + bool src_fp = r_src.IsFloat(); + DCHECK(r_dest.Is64Bit()); + DCHECK(r_src.Is64Bit()); + if (dest_fp) { + if (src_fp) { + OpRegCopy(r_dest, r_src); + } else { + NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg()); + } + } else { + if (src_fp) { + NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg()); + } else { + // Handle overlap + if (r_src.GetHighReg() == r_dest.GetLowReg()) { + DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg()); + OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + } else { + OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + } + } + } + } +} + +// Table of magic divisors +struct MagicTable { + uint32_t magic; + uint32_t shift; + DividePattern pattern; +}; + +static const MagicTable magic_table[] = { + {0, 0, DivideNone}, // 0 + {0, 0, DivideNone}, // 1 + {0, 0, DivideNone}, // 2 + {0x55555556, 0, Divide3}, // 3 + {0, 0, DivideNone}, // 4 + {0x66666667, 1, Divide5}, // 5 + {0x2AAAAAAB, 0, Divide3}, // 6 + {0x92492493, 2, Divide7}, // 7 + {0, 0, DivideNone}, // 8 + {0x38E38E39, 1, Divide5}, // 9 + {0x66666667, 2, Divide5}, // 10 + {0x2E8BA2E9, 1, Divide5}, // 11 + {0x2AAAAAAB, 1, Divide5}, // 12 + {0x4EC4EC4F, 2, Divide5}, // 13 + {0x92492493, 3, Divide7}, // 14 + {0x88888889, 3, Divide7}, // 15 +}; + +// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) +bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, + RegLocation rl_src, RegLocation rl_dest, int lit) { + if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) { + return false; + } + DividePattern pattern = magic_table[lit].pattern; + if (pattern == DivideNone) { + return false; + } + + RegStorage r_magic = AllocTemp(); + LoadConstant(r_magic, magic_table[lit].magic); + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage r_hi = AllocTemp(); + RegStorage r_lo = AllocTemp(); + + // rl_dest and rl_src might overlap. + // Reuse r_hi to save the div result for reminder case. + RegStorage r_div_result = is_div ? rl_result.reg : r_hi; + + NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); + switch (pattern) { + case Divide3: + OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31)); + break; + case Divide5: + OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); + OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, + EncodeShift(kArmAsr, magic_table[lit].shift)); + break; + case Divide7: + OpRegReg(kOpAdd, r_hi, rl_src.reg); + OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); + OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi, + EncodeShift(kArmAsr, magic_table[lit].shift)); + break; + default: + LOG(FATAL) << "Unexpected pattern: " << pattern; + } + + if (!is_div) { + // div_result = src / lit + // tmp1 = div_result * lit + // dest = src - tmp1 + RegStorage tmp1 = r_lo; + EasyMultiplyOp ops[2]; + + bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops); + DCHECK_NE(canEasyMultiply, false); + + GenEasyMultiplyTwoOps(tmp1, r_div_result, ops); + OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1); + } + + StoreValue(rl_dest, rl_result); + return true; +} + +// Try to convert *lit to 1 RegRegRegShift/RegRegShift form. +bool Arm64Mir2Lir::GetEasyMultiplyOp(int lit, Arm64Mir2Lir::EasyMultiplyOp* op) { + if (IsPowerOfTwo(lit)) { + op->op = kOpLsl; + op->shift = LowestSetBit(lit); + return true; + } + + if (IsPowerOfTwo(lit - 1)) { + op->op = kOpAdd; + op->shift = LowestSetBit(lit - 1); + return true; + } + + if (IsPowerOfTwo(lit + 1)) { + op->op = kOpRsub; + op->shift = LowestSetBit(lit + 1); + return true; + } + + op->op = kOpInvalid; + op->shift = 0; + return false; +} + +// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms. +bool Arm64Mir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) { + GetEasyMultiplyOp(lit, &ops[0]); + if (GetEasyMultiplyOp(lit, &ops[0])) { + ops[1].op = kOpInvalid; + ops[1].shift = 0; + return true; + } + + int lit1 = lit; + uint32_t shift = LowestSetBit(lit1); + if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { + ops[1].op = kOpLsl; + ops[1].shift = shift; + return true; + } + + lit1 = lit - 1; + shift = LowestSetBit(lit1); + if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { + ops[1].op = kOpAdd; + ops[1].shift = shift; + return true; + } + + lit1 = lit + 1; + shift = LowestSetBit(lit1); + if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) { + ops[1].op = kOpRsub; + ops[1].shift = shift; + return true; + } + + return false; +} + +// Generate instructions to do multiply. +// Additional temporary register is required, +// if it need to generate 2 instructions and src/dest overlap. +void Arm64Mir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) { + // tmp1 = ( src << shift1) + [ src | -src | 0 ] + // dest = (tmp1 << shift2) + [ src | -src | 0 ] + + RegStorage r_tmp1; + if (ops[1].op == kOpInvalid) { + r_tmp1 = r_dest; + } else if (r_dest.GetReg() != r_src.GetReg()) { + r_tmp1 = r_dest; + } else { + r_tmp1 = AllocTemp(); + } + + switch (ops[0].op) { + case kOpLsl: + OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift); + break; + case kOpAdd: + OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); + break; + case kOpRsub: + OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift)); + break; + default: + DCHECK_EQ(ops[0].op, kOpInvalid); + break; + } + + switch (ops[1].op) { + case kOpInvalid: + return; + case kOpLsl: + OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift); + break; + case kOpAdd: + OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); + break; + case kOpRsub: + OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift)); + break; + default: + LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps"; + break; + } +} + +bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { + EasyMultiplyOp ops[2]; + + if (!GetEasyMultiplyTwoOps(lit, ops)) { + return false; + } + + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops); + StoreValue(rl_dest, rl_result); + return true; +} + +RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2, bool is_div, bool check_zero) { + LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; + return rl_dest; +} + +RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) { + LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; + return rl_dest; +} + +RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) { + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + // Put the literal in a temp. + RegStorage lit_temp = AllocTemp(); + LoadConstant(lit_temp, lit); + // Use the generic case for div/rem with arg2 in a register. + // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. + rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); + FreeTemp(lit_temp); + + return rl_result; +} + +RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2, + bool is_div) { + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (is_div) { + // Simple case, use sdiv instruction. + OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2); + } else { + // Remainder case, use the following code: + // temp = reg1 / reg2 - integer division + // temp = temp * reg2 + // dest = reg1 - temp + + RegStorage temp = AllocTemp(); + OpRegRegReg(kOpDiv, temp, reg1, reg2); + OpRegReg(kOpMul, temp, reg2); + OpRegRegReg(kOpSub, rl_result.reg, reg1, temp); + FreeTemp(temp); + } + + return rl_result; +} + +bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { + DCHECK_EQ(cu_->instruction_set, kThumb2); + RegLocation rl_src1 = info->args[0]; + RegLocation rl_src2 = info->args[1]; + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); + LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E"); + OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); + OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); + OpEndIT(it); + StoreValue(rl_dest, rl_result); + return true; +} + +bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == k64) { + // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. + if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) { + Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow()); + Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh()); + } else { + Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh()); + Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow()); + } + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); + // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. + LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + } + return true; +} + +bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + if (size == k64) { + // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. + RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32); + StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); + // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); + } + return true; +} + +void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { + LOG(FATAL) << "Unexpected use of OpLea for Arm"; +} + +void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { + LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm"; +} + +bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { + DCHECK_EQ(cu_->instruction_set, kThumb2); + // Unused - RegLocation rl_src_unsafe = info->args[0]; + RegLocation rl_src_obj = info->args[1]; // Object - known non-null + RegLocation rl_src_offset = info->args[2]; // long low + rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] + RegLocation rl_src_expected = info->args[4]; // int, long or Object + // If is_long, high half is in info->args[5] + RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object + // If is_long, high half is in info->args[7] + RegLocation rl_dest = InlineTarget(info); // boolean place for result + + // We have only 5 temporary registers available and actually only 4 if the InlineTarget + // above locked one of the temps. For a straightforward CAS64 we need 7 registers: + // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor + // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop + // into the same temps, reducing the number of required temps down to 5. We shall work + // around the potentially locked temp by using LR for r_ptr, unconditionally. + // TODO: Pass information about the need for more temps to the stack frame generation + // code so that we can rely on being able to allocate enough temps. + DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp()); + MarkTemp(rs_rARM_LR); + FreeTemp(rs_rARM_LR); + LockTemp(rs_rARM_LR); + bool load_early = true; + if (is_long) { + RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() : + rl_src_expected.reg; + RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() : + rl_src_new_value.reg; + bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat(); + bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat(); + bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg); + bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg); + + if (!expected_is_good_reg && !new_value_is_good_reg) { + // None of expected/new_value is non-temp reg, need to load both late + load_early = false; + // Make sure they are not in the temp regs and the load will not be skipped. + if (expected_is_core_reg) { + FlushRegWide(rl_src_expected.reg); + ClobberSReg(rl_src_expected.s_reg_low); + ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); + rl_src_expected.location = kLocDalvikFrame; + } + if (new_value_is_core_reg) { + FlushRegWide(rl_src_new_value.reg); + ClobberSReg(rl_src_new_value.s_reg_low); + ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); + rl_src_new_value.location = kLocDalvikFrame; + } + } + } + + // Release store semantics, get the barrier out of the way. TODO: revisit + GenMemBarrier(kStoreLoad); + + RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_new_value; + if (!is_long) { + rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + } else if (load_early) { + rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); + } + + if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { + // Mark card for object assuming new value is stored. + MarkGCCard(rl_new_value.reg, rl_object.reg); + } + + RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); + + RegStorage r_ptr = rs_rARM_LR; + OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); + + // Free now unneeded rl_object and rl_offset to give more temps. + ClobberSReg(rl_object.s_reg_low); + FreeTemp(rl_object.reg); + ClobberSReg(rl_offset.s_reg_low); + FreeTemp(rl_offset.reg); + + RegLocation rl_expected; + if (!is_long) { + rl_expected = LoadValue(rl_src_expected, kCoreReg); + } else if (load_early) { + rl_expected = LoadValueWide(rl_src_expected, kCoreReg); + } else { + // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. + RegStorage low_reg = AllocTemp(); + RegStorage high_reg = AllocTemp(); + rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg); + rl_expected = rl_new_value; + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; + + RegStorage r_tmp = AllocTemp(); + LIR* target = NewLIR0(kPseudoTargetLabel); + + LIR* it = nullptr; + if (is_long) { + RegStorage r_tmp_high = AllocTemp(); + if (!load_early) { + LoadValueDirectWide(rl_src_expected, rl_expected.reg); + } + NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); + OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow()); + OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh()); + if (!load_early) { + LoadValueDirectWide(rl_src_new_value, rl_new_value.reg); + } + // Make sure we use ORR that sets the ccode + if (r_tmp.Low8() && r_tmp_high.Low8()) { + NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg()); + } else { + NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0); + } + FreeTemp(r_tmp_high); // Now unneeded + + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + it = OpIT(kCondEq, "T"); + NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); + + } else { + NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0); + OpRegReg(kOpSub, r_tmp, rl_expected.reg); + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + it = OpIT(kCondEq, "T"); + NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); + } + + // Still one conditional left from OpIT(kCondEq, "T") from either branch + OpRegImm(kOpCmp /* eq */, r_tmp, 1); + OpEndIT(it); + + OpCondBranch(kCondEq, target); + + if (!load_early) { + FreeTemp(rl_expected.reg); // Now unneeded. + } + + // result := (tmp1 != 0) ? 0 : 1; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + it = OpIT(kCondUlt, ""); + LoadConstant(rl_result.reg, 0); /* cc */ + FreeTemp(r_tmp); // Now unneeded. + OpEndIT(it); // Barrier to terminate OpIT. + + StoreValue(rl_dest, rl_result); + + // Now, restore lr to its non-temp status. + Clobber(rs_rARM_LR); + UnmarkTemp(rs_rARM_LR); + return true; +} + +LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { + return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target); +} + +LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) { + return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count); +} + +LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) { + return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count); +} + +void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, + RegLocation rl_result, int lit, + int first_bit, int second_bit) { + OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, + EncodeShift(kArmLsl, second_bit - first_bit)); + if (first_bit != 0) { + OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); + } +} + +void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { + DCHECK(reg.IsPair()); // TODO: support k64BitSolo. + RegStorage t_reg = AllocTemp(); + NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0); + FreeTemp(t_reg); + GenDivZeroCheck(kCondEq); +} + +// Test suspend flag, return target of taken suspend branch +LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) { + NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1); + return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); +} + +// Decrement register and branch on condition +LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { + // Combine sub & test using sub setflags encoding here + OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + return OpCondBranch(c_code, target); +} + +void Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { +#if ANDROID_SMP != 0 + // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one. + LIR* barrier = last_lir_insn_; + + int dmb_flavor; + // TODO: revisit Arm barrier kinds + switch (barrier_kind) { + case kLoadStore: dmb_flavor = kISH; break; + case kLoadLoad: dmb_flavor = kISH; break; + case kStoreStore: dmb_flavor = kISHST; break; + case kStoreLoad: dmb_flavor = kISH; break; + default: + LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind; + dmb_flavor = kSY; // quiet gcc. + break; + } + + // If the same barrier already exists, don't generate another. + if (barrier == nullptr + || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) { + barrier = NewLIR1(kThumb2Dmb, dmb_flavor); + } + + // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. + DCHECK(!barrier->flags.use_def_invalid); + barrier->u.m.def_mask = ENCODE_ALL; +#endif +} + +void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage z_reg = AllocTemp(); + LoadConstantNoClobber(z_reg, 0); + // Check for destructive overlap + if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) { + RegStorage t_reg = AllocTemp(); + OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); + OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg); + FreeTemp(t_reg); + } else { + OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); + OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh()); + } + FreeTemp(z_reg); + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + /* + * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed + * dest = src1.lo * src2.lo; + * tmp1 += src1.lo * src2.hi; + * dest.hi += tmp1; + * + * To pull off inline multiply, we have a worst-case requirement of 7 temporary + * registers. Normally for Arm, we get 5. We can get to 6 by including + * lr in the temp set. The only problematic case is all operands and result are + * distinct, and none have been promoted. In that case, we can succeed by aggressively + * freeing operand temp registers after they are no longer needed. All other cases + * can proceed normally. We'll just punt on the case of the result having a misaligned + * overlap with either operand and send that case to a runtime handler. + */ + RegLocation rl_result; + if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) { + ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul); + FlushAllRegs(); + CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); + rl_result = GetReturnWide(false); + StoreValueWide(rl_dest, rl_result); + return; + } + + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + + int reg_status = 0; + RegStorage res_lo; + RegStorage res_hi; + bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() && + !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh()); + bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh()); + bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh()); + // Check if rl_dest is *not* either operand and we have enough temp registers. + if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) && + (dest_promoted || src1_promoted || src2_promoted)) { + // In this case, we do not need to manually allocate temp registers for result. + rl_result = EvalLoc(rl_dest, kCoreReg, true); + res_lo = rl_result.reg.GetLow(); + res_hi = rl_result.reg.GetHigh(); + } else { + res_lo = AllocTemp(); + if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) { + // In this case, we have enough temp registers to be allocated for result. + res_hi = AllocTemp(); + reg_status = 1; + } else { + // In this case, all temps are now allocated. + // res_hi will be allocated after we can free src1_hi. + reg_status = 2; + } + } + + // Temporarily add LR to the temp pool, and assign it to tmp1 + MarkTemp(rs_rARM_LR); + FreeTemp(rs_rARM_LR); + RegStorage tmp1 = rs_rARM_LR; + LockTemp(rs_rARM_LR); + + if (rl_src1.reg == rl_src2.reg) { + DCHECK(res_hi.Valid()); + DCHECK(res_lo.Valid()); + NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()); + NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(), + rl_src1.reg.GetLowReg()); + OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); + } else { + NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg()); + if (reg_status == 2) { + DCHECK(!res_hi.Valid()); + DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg()); + DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); + FreeTemp(rl_src1.reg.GetHigh()); + res_hi = AllocTemp(); + } + DCHECK(res_hi.Valid()); + DCHECK(res_lo.Valid()); + NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(), + rl_src1.reg.GetLowReg()); + NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(), + tmp1.GetReg()); + NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0); + if (reg_status == 2) { + // Clobber rl_src1 since it was corrupted. + FreeTemp(rl_src1.reg); + Clobber(rl_src1.reg); + } + } + + // Now, restore lr to its non-temp status. + FreeTemp(tmp1); + Clobber(rs_rARM_LR); + UnmarkTemp(rs_rARM_LR); + + if (reg_status != 0) { + // We had manually allocated registers for rl_result. + // Now construct a RegLocation. + rl_result = GetReturnWide(false); // Just using as a template. + rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi); + } + + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + LOG(FATAL) << "Unexpected use of GenAddLong for Arm"; +} + +void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + LOG(FATAL) << "Unexpected use of GenSubLong for Arm"; +} + +void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + LOG(FATAL) << "Unexpected use of GenAndLong for Arm"; +} + +void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + LOG(FATAL) << "Unexpected use of GenOrLong for Arm"; +} + +void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) { + LOG(FATAL) << "Unexpected use of genXoLong for Arm"; +} + +/* + * Generate array load + */ +void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale) { + RegisterClass reg_class = RegClassBySize(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + RegLocation rl_result; + bool constant_index = rl_index.is_const; + rl_array = LoadValue(rl_array, kCoreReg); + if (!constant_index) { + rl_index = LoadValue(rl_index, kCoreReg); + } + + if (rl_dest.wide) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + // If index is constant, just fold it into the data offset + if (constant_index) { + data_offset += mir_graph_->ConstantValue(rl_index) << scale; + } + + /* null object? */ + GenNullCheck(rl_array.reg, opt_flags); + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + RegStorage reg_len; + if (needs_range_check) { + reg_len = AllocTemp(); + /* Get len */ + Load32Disp(rl_array.reg, len_offset, reg_len); + MarkPossibleNullPointerException(opt_flags); + } else { + ForceImplicitNullCheck(rl_array.reg, opt_flags); + } + if (rl_dest.wide || rl_dest.fp || constant_index) { + RegStorage reg_ptr; + if (constant_index) { + reg_ptr = rl_array.reg; // NOTE: must not alter reg_ptr in constant case. + } else { + // No special indexed operation, lea + load w/ displacement + reg_ptr = AllocTemp(); + OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); + FreeTemp(rl_index.reg); + } + rl_result = EvalLoc(rl_dest, reg_class, true); + + if (needs_range_check) { + if (constant_index) { + GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len); + } else { + GenArrayBoundsCheck(rl_index.reg, reg_len); + } + FreeTemp(reg_len); + } + if (rl_dest.wide) { + LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg, INVALID_SREG); + MarkPossibleNullPointerException(opt_flags); + if (!constant_index) { + FreeTemp(reg_ptr); + } + StoreValueWide(rl_dest, rl_result); + } else { + LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG); + MarkPossibleNullPointerException(opt_flags); + if (!constant_index) { + FreeTemp(reg_ptr); + } + StoreValue(rl_dest, rl_result); + } + } else { + // Offset base, then use indexed load + RegStorage reg_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); + FreeTemp(rl_array.reg); + rl_result = EvalLoc(rl_dest, reg_class, true); + + if (needs_range_check) { + GenArrayBoundsCheck(rl_index.reg, reg_len); + FreeTemp(reg_len); + } + LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size); + MarkPossibleNullPointerException(opt_flags); + FreeTemp(reg_ptr); + StoreValue(rl_dest, rl_result); + } +} + +/* + * Generate array store + * + */ +void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { + RegisterClass reg_class = RegClassBySize(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + bool constant_index = rl_index.is_const; + + int data_offset; + if (size == k64 || size == kDouble) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + // If index is constant, just fold it into the data offset. + if (constant_index) { + data_offset += mir_graph_->ConstantValue(rl_index) << scale; + } + + rl_array = LoadValue(rl_array, kCoreReg); + if (!constant_index) { + rl_index = LoadValue(rl_index, kCoreReg); + } + + RegStorage reg_ptr; + bool allocated_reg_ptr_temp = false; + if (constant_index) { + reg_ptr = rl_array.reg; + } else if (IsTemp(rl_array.reg) && !card_mark) { + Clobber(rl_array.reg); + reg_ptr = rl_array.reg; + } else { + allocated_reg_ptr_temp = true; + reg_ptr = AllocTemp(); + } + + /* null object? */ + GenNullCheck(rl_array.reg, opt_flags); + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + RegStorage reg_len; + if (needs_range_check) { + reg_len = AllocTemp(); + // NOTE: max live temps(4) here. + /* Get len */ + Load32Disp(rl_array.reg, len_offset, reg_len); + MarkPossibleNullPointerException(opt_flags); + } else { + ForceImplicitNullCheck(rl_array.reg, opt_flags); + } + /* at this point, reg_ptr points to array, 2 live temps */ + if (rl_src.wide || rl_src.fp || constant_index) { + if (rl_src.wide) { + rl_src = LoadValueWide(rl_src, reg_class); + } else { + rl_src = LoadValue(rl_src, reg_class); + } + if (!constant_index) { + OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); + } + if (needs_range_check) { + if (constant_index) { + GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len); + } else { + GenArrayBoundsCheck(rl_index.reg, reg_len); + } + FreeTemp(reg_len); + } + + if (rl_src.wide) { + StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg); + } else { + StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size); + } + MarkPossibleNullPointerException(opt_flags); + } else { + /* reg_ptr -> array data */ + OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); + rl_src = LoadValue(rl_src, reg_class); + if (needs_range_check) { + GenArrayBoundsCheck(rl_index.reg, reg_len); + FreeTemp(reg_len); + } + StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size); + MarkPossibleNullPointerException(opt_flags); + } + if (allocated_reg_ptr_temp) { + FreeTemp(reg_ptr); + } + if (card_mark) { + MarkGCCard(rl_src.reg, rl_array.reg); + } +} + + +void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { + rl_src = LoadValueWide(rl_src, kCoreReg); + // Per spec, we only care about low 6 bits of shift amount. + int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; + if (shift_amount == 0) { + StoreValueWide(rl_dest, rl_src); + return; + } + if (BadOverlap(rl_src, rl_dest)) { + GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); + return; + } + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + if (shift_amount == 1) { + OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow()); + OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh()); + } else if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg); + LoadConstant(rl_result.reg.GetLow(), 0); + } else if (shift_amount > 31) { + OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32); + LoadConstant(rl_result.reg.GetLow(), 0); + } else { + OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); + OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(), + EncodeShift(kArmLsr, 32 - shift_amount)); + OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); + } else if (shift_amount > 31) { + OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); + OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); + } else { + RegStorage t_reg = AllocTemp(); + OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); + OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(), + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(t_reg); + OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + LoadConstant(rl_result.reg.GetHigh(), 0); + } else if (shift_amount > 31) { + OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); + LoadConstant(rl_result.reg.GetHigh(), 0); + } else { + RegStorage t_reg = AllocTemp(); + OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); + OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(), + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(t_reg); + OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + } + StoreValueWide(rl_dest, rl_result); +} + +void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { + if (!rl_src2.is_const) { + // Don't bother with special handling for subtract from immediate. + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + return; + } + } else { + // Normalize + if (!rl_src2.is_const) { + DCHECK(rl_src1.is_const); + std::swap(rl_src1, rl_src2); + } + } + if (BadOverlap(rl_src1, rl_dest)) { + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + return; + } + DCHECK(rl_src2.is_const); + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + uint32_t val_lo = Low32Bits(val); + uint32_t val_hi = High32Bits(val); + int32_t mod_imm_lo = ModifiedImmediate(val_lo); + int32_t mod_imm_hi = ModifiedImmediate(val_hi); + + // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit + switch (opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + return; + } + break; + default: + break; + } + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + // NOTE: once we've done the EvalLoc on dest, we can no longer bail. + switch (opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); + NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); + break; + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { + OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); + } + if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { + OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); + } + break; + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); + OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); + break; + case Instruction::AND_LONG: + case Instruction::AND_LONG_2ADDR: + if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { + OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); + } + if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { + OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); + } + break; + case Instruction::SUB_LONG_2ADDR: + case Instruction::SUB_LONG: + NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); + NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); + break; + default: + LOG(FATAL) << "Unexpected opcode " << opcode; + } + StoreValueWide(rl_dest, rl_result); +} + +} // namespace art diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc new file mode 100644 index 000000000..233e9c2fb --- /dev/null +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -0,0 +1,803 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_arm64.h" + +#include <inttypes.h> + +#include <string> + +#include "dex/compiler_internals.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +// TODO: rework this when c++11 support allows. +static const RegStorage core_regs_arr[] = + {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF, + rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; +static const RegStorage sp_regs_arr[] = + {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, + rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20, + rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30, + rs_fr31}; +static const RegStorage dp_regs_arr[] = + {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, + rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15}; +static const RegStorage reserved_regs_arr[] = + {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; +static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12}; +static const RegStorage sp_temps_arr[] = + {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, + rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15}; +static const RegStorage dp_temps_arr[] = + {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7}; + +static const std::vector<RegStorage> core_regs(core_regs_arr, + core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0])); +static const std::vector<RegStorage> sp_regs(sp_regs_arr, + sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0])); +static const std::vector<RegStorage> dp_regs(dp_regs_arr, + dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0])); +static const std::vector<RegStorage> reserved_regs(reserved_regs_arr, + reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0])); +static const std::vector<RegStorage> core_temps(core_temps_arr, + core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0])); +static const std::vector<RegStorage> sp_temps(sp_temps_arr, + sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0])); +static const std::vector<RegStorage> dp_temps(dp_temps_arr, + dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0])); + +RegLocation Arm64Mir2Lir::LocCReturn() { + return arm_loc_c_return; +} + +RegLocation Arm64Mir2Lir::LocCReturnWide() { + return arm_loc_c_return_wide; +} + +RegLocation Arm64Mir2Lir::LocCReturnFloat() { + return arm_loc_c_return_float; +} + +RegLocation Arm64Mir2Lir::LocCReturnDouble() { + return arm_loc_c_return_double; +} + +// Return a target-dependent special register. +RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { + RegStorage res_reg = RegStorage::InvalidReg(); + switch (reg) { + case kSelf: res_reg = rs_rARM_SELF; break; + case kSuspend: res_reg = rs_rARM_SUSPEND; break; + case kLr: res_reg = rs_rARM_LR; break; + case kPc: res_reg = rs_rARM_PC; break; + case kSp: res_reg = rs_rARM_SP; break; + case kArg0: res_reg = rs_r0; break; + case kArg1: res_reg = rs_r1; break; + case kArg2: res_reg = rs_r2; break; + case kArg3: res_reg = rs_r3; break; + case kFArg0: res_reg = rs_r0; break; + case kFArg1: res_reg = rs_r1; break; + case kFArg2: res_reg = rs_r2; break; + case kFArg3: res_reg = rs_r3; break; + case kRet0: res_reg = rs_r0; break; + case kRet1: res_reg = rs_r1; break; + case kInvokeTgt: res_reg = rs_rARM_LR; break; + case kHiddenArg: res_reg = rs_r12; break; + case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; + case kCount: res_reg = RegStorage::InvalidReg(); break; + } + return res_reg; +} + +RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { + // For the 32-bit internal ABI, the first 3 arguments are passed in registers. + switch (arg_num) { + case 0: + return rs_r1; + case 1: + return rs_r2; + case 2: + return rs_r3; + default: + return RegStorage::InvalidReg(); + } +} + +/* + * Decode the register id. + */ +uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) { + uint64_t seed; + int shift; + int reg_id = reg.GetRegNum(); + /* Each double register is equal to a pair of single-precision FP registers */ + if (reg.IsDouble()) { + seed = 0x3; + reg_id = reg_id << 1; + } else { + seed = 1; + } + /* FP register starts at bit position 16 */ + shift = reg.IsFloat() ? kArmFPReg0 : 0; + /* Expand the double register id into single offset */ + shift += reg_id; + return (seed << shift); +} + +uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() { + return ENCODE_ARM_REG_PC; +} + +// Thumb2 specific setup. TODO: inline?: +void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { + DCHECK_EQ(cu_->instruction_set, kThumb2); + DCHECK(!lir->flags.use_def_invalid); + + int opcode = lir->opcode; + + // These flags are somewhat uncommon - bypass if we can. + if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 | + REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 | + REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) { + if (flags & REG_DEF_SP) { + lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + } + + if (flags & REG_USE_SP) { + lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + } + + if (flags & REG_DEF_LIST0) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } + + if (flags & REG_DEF_LIST1) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } + + if (flags & REG_DEF_FPCS_LIST0) { + lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } + + if (flags & REG_DEF_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i); + } + } + + if (flags & REG_USE_PC) { + lir->u.m.use_mask |= ENCODE_ARM_REG_PC; + } + + /* Conservatively treat the IT block */ + if (flags & IS_IT) { + lir->u.m.def_mask = ENCODE_ALL; + } + + if (flags & REG_USE_LIST0) { + lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } + + if (flags & REG_USE_LIST1) { + lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } + + if (flags & REG_USE_FPCS_LIST0) { + lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } + + if (flags & REG_USE_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i); + } + } + /* Fixup for kThumbPush/lr and kThumbPop/pc */ + if (opcode == kThumbPush || opcode == kThumbPop) { + uint64_t r8Mask = GetRegMaskCommon(rs_r8); + if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) { + lir->u.m.use_mask &= ~r8Mask; + lir->u.m.use_mask |= ENCODE_ARM_REG_LR; + } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) { + lir->u.m.def_mask &= ~r8Mask; + lir->u.m.def_mask |= ENCODE_ARM_REG_PC; + } + } + if (flags & REG_DEF_LR) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LR; + } + } +} + +ArmConditionCode Arm64Mir2Lir::ArmConditionEncoding(ConditionCode ccode) { + ArmConditionCode res; + switch (ccode) { + case kCondEq: res = kArmCondEq; break; + case kCondNe: res = kArmCondNe; break; + case kCondCs: res = kArmCondCs; break; + case kCondCc: res = kArmCondCc; break; + case kCondUlt: res = kArmCondCc; break; + case kCondUge: res = kArmCondCs; break; + case kCondMi: res = kArmCondMi; break; + case kCondPl: res = kArmCondPl; break; + case kCondVs: res = kArmCondVs; break; + case kCondVc: res = kArmCondVc; break; + case kCondHi: res = kArmCondHi; break; + case kCondLs: res = kArmCondLs; break; + case kCondGe: res = kArmCondGe; break; + case kCondLt: res = kArmCondLt; break; + case kCondGt: res = kArmCondGt; break; + case kCondLe: res = kArmCondLe; break; + case kCondAl: res = kArmCondAl; break; + case kCondNv: res = kArmCondNv; break; + default: + LOG(FATAL) << "Bad condition code " << ccode; + res = static_cast<ArmConditionCode>(0); // Quiet gcc + } + return res; +} + +static const char* core_reg_names[16] = { + "r0", + "r1", + "r2", + "r3", + "r4", + "r5", + "r6", + "r7", + "r8", + "rSELF", + "r10", + "r11", + "r12", + "sp", + "lr", + "pc", +}; + + +static const char* shift_names[4] = { + "lsl", + "lsr", + "asr", + "ror"}; + +/* Decode and print a ARM register name */ +static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) { + int i; + bool printed = false; + buf[0] = 0; + for (i = 0; i < 16; i++, vector >>= 1) { + if (vector & 0x1) { + int reg_id = i; + if (opcode == kThumbPush && i == 8) { + reg_id = rs_rARM_LR.GetRegNum(); + } else if (opcode == kThumbPop && i == 8) { + reg_id = rs_rARM_PC.GetRegNum(); + } + if (printed) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id); + } else { + printed = true; + snprintf(buf, buf_size, "r%d", reg_id); + } + } + } + return buf; +} + +static char* DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) { + snprintf(buf, buf_size, "s%d", base); + for (int i = 1; i < count; i++) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i); + } + return buf; +} + +static int32_t ExpandImmediate(int value) { + int32_t mode = (value & 0xf00) >> 8; + uint32_t bits = value & 0xff; + switch (mode) { + case 0: + return bits; + case 1: + return (bits << 16) | bits; + case 2: + return (bits << 24) | (bits << 8); + case 3: + return (bits << 24) | (bits << 16) | (bits << 8) | bits; + default: + break; + } + bits = (bits | 0x80) << 24; + return bits >> (((value & 0xf80) >> 7) - 8); +} + +const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"}; +/* + * Interpret a format string and build a string no longer than size + * See format key in Assemble.c. + */ +std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) { + std::string buf; + int i; + const char* fmt_end = &fmt[strlen(fmt)]; + char tbuf[256]; + const char* name; + char nc; + while (fmt < fmt_end) { + int operand; + if (*fmt == '!') { + fmt++; + DCHECK_LT(fmt, fmt_end); + nc = *fmt++; + if (nc == '!') { + strcpy(tbuf, "!"); + } else { + DCHECK_LT(fmt, fmt_end); + DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U); + operand = lir->operands[nc-'0']; + switch (*fmt++) { + case 'H': + if (operand != 0) { + snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2); + } else { + strcpy(tbuf, ""); + } + break; + case 'B': + switch (operand) { + case kSY: + name = "sy"; + break; + case kST: + name = "st"; + break; + case kISH: + name = "ish"; + break; + case kISHST: + name = "ishst"; + break; + case kNSH: + name = "nsh"; + break; + case kNSHST: + name = "shst"; + break; + default: + name = "DecodeError2"; + break; + } + strcpy(tbuf, name); + break; + case 'b': + strcpy(tbuf, "0000"); + for (i = 3; i >= 0; i--) { + tbuf[i] += operand & 1; + operand >>= 1; + } + break; + case 'n': + operand = ~ExpandImmediate(operand); + snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand); + break; + case 'm': + operand = ExpandImmediate(operand); + snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand); + break; + case 's': + snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand)); + break; + case 'S': + snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand)); + break; + case 'h': + snprintf(tbuf, arraysize(tbuf), "%04x", operand); + break; + case 'M': + case 'd': + snprintf(tbuf, arraysize(tbuf), "%d", operand); + break; + case 'C': + operand = RegStorage::RegNum(operand); + DCHECK_LT(operand, static_cast<int>( + sizeof(core_reg_names)/sizeof(core_reg_names[0]))); + snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]); + break; + case 'E': + snprintf(tbuf, arraysize(tbuf), "%d", operand*4); + break; + case 'F': + snprintf(tbuf, arraysize(tbuf), "%d", operand*2); + break; + case 'c': + strcpy(tbuf, cc_names[operand]); + break; + case 't': + snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)", + reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1), + lir->target); + break; + case 'u': { + int offset_1 = lir->operands[0]; + int offset_2 = NEXT_LIR(lir)->operands[0]; + uintptr_t target = + (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & + ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) & + 0xfffffffc; + snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target)); + break; + } + + /* Nothing to print for BLX_2 */ + case 'v': + strcpy(tbuf, "see above"); + break; + case 'R': + DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf)); + break; + case 'P': + DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf)); + break; + case 'Q': + DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf)); + break; + default: + strcpy(tbuf, "DecodeError1"); + break; + } + buf += tbuf; + } + } else { + buf += *fmt++; + } + } + return buf; +} + +void Arm64Mir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) { + char buf[256]; + buf[0] = 0; + + if (mask == ENCODE_ALL) { + strcpy(buf, "all"); + } else { + char num[8]; + int i; + + for (i = 0; i < kArmRegEnd; i++) { + if (mask & (1ULL << i)) { + snprintf(num, arraysize(num), "%d ", i); + strcat(buf, num); + } + } + + if (mask & ENCODE_CCODE) { + strcat(buf, "cc "); + } + if (mask & ENCODE_FP_STATUS) { + strcat(buf, "fpcc "); + } + + /* Memory bits */ + if (arm_lir && (mask & ENCODE_DALVIK_REG)) { + snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", + DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), + DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); + } + if (mask & ENCODE_LITERAL) { + strcat(buf, "lit "); + } + + if (mask & ENCODE_HEAP_REF) { + strcat(buf, "heap "); + } + if (mask & ENCODE_MUST_NOT_ALIAS) { + strcat(buf, "noalias "); + } + } + if (buf[0]) { + LOG(INFO) << prefix << ": " << buf; + } +} + +bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) { + return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond)); +} + +Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) + : Mir2Lir(cu, mir_graph, arena) { + // Sanity check - make sure encoding map lines up. + for (int i = 0; i < kArmLast; i++) { + if (Arm64Mir2Lir::EncodingMap[i].opcode != i) { + LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode); + } + } +} + +Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena) { + return new Arm64Mir2Lir(cu, mir_graph, arena); +} + +// Alloc a pair of core registers, or a double. +RegStorage Arm64Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) { + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { + return AllocTempDouble(); + } else { + RegStorage low_reg = AllocTemp(); + RegStorage high_reg = AllocTemp(); + return RegStorage::MakeRegPair(low_reg, high_reg); + } +} + +RegStorage Arm64Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) { + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) + return AllocTempSingle(); + return AllocTemp(); +} + +void Arm64Mir2Lir::CompilerInitializeRegAlloc() { + reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs, + core_temps, sp_temps, dp_temps); + + // Target-specific adjustments. + + // Alias single precision floats to appropriate half of overlapping double. + GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_); + for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { + int sp_reg_num = info->GetReg().GetRegNum(); + int dp_reg_num = sp_reg_num >> 1; + RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num); + RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); + // Double precision register's master storage should refer to itself. + DCHECK_EQ(dp_reg_info, dp_reg_info->Master()); + // Redirect single precision's master storage to master. + info->SetMaster(dp_reg_info); + // Singles should show a single 32-bit mask bit, at first referring to the low half. + DCHECK_EQ(info->StorageMask(), 0x1U); + if (sp_reg_num & 1) { + // For odd singles, change to user the high word of the backing double. + info->SetStorageMask(0x2); + } + } + + // TODO: re-enable this when we can safely save r4 over the suspension code path. + bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks(); + if (no_suspend) { + GetRegInfo(rs_rARM_SUSPEND)->MarkFree(); + } + + // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. + // TODO: adjust when we roll to hard float calling convention. + reg_pool_->next_core_reg_ = 2; + reg_pool_->next_sp_reg_ = 0; + reg_pool_->next_dp_reg_ = 0; +} + +void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) { + DCHECK(rl_keep.wide); + DCHECK(rl_free.wide); + if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) && + (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) && + (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) && + (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) { + // No overlap, free. + FreeTemp(rl_free.reg); + } +} + +/* + * TUNING: is true leaf? Can't just use METHOD_IS_LEAF to determine as some + * instructions might call out to C/assembly helper functions. Until + * machinery is in place, always spill lr. + */ + +void Arm64Mir2Lir::AdjustSpillMask() { + core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum()); + num_core_spills_++; +} + +/* + * Mark a callee-save fp register as promoted. Note that + * vpush/vpop uses contiguous register lists so we must + * include any holes in the mask. Associate holes with + * Dalvik register INVALID_VREG (0xFFFFU). + */ +void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) { + DCHECK_GE(reg.GetRegNum(), ARM_FP_CALLEE_SAVE_BASE); + int adjusted_reg_num = reg.GetRegNum() - ARM_FP_CALLEE_SAVE_BASE; + // Ensure fp_vmap_table is large enough + int table_size = fp_vmap_table_.size(); + for (int i = table_size; i < (adjusted_reg_num + 1); i++) { + fp_vmap_table_.push_back(INVALID_VREG); + } + // Add the current mapping + fp_vmap_table_[adjusted_reg_num] = v_reg; + // Size of fp_vmap_table is high-water mark, use to set mask + num_fp_spills_ = fp_vmap_table_.size(); + fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE; +} + +void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) { + // TEMP: perform as 2 singles. + int reg_num = reg.GetRegNum() << 1; + RegStorage lo = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num); + RegStorage hi = RegStorage::Solo32(RegStorage::kFloatingPoint | reg_num | 1); + MarkPreservedSingle(v_reg, lo); + MarkPreservedSingle(v_reg + 1, hi); +} + +/* Clobber all regs that might be used by an external C call */ +void Arm64Mir2Lir::ClobberCallerSave() { + // TODO: rework this - it's gotten even more ugly. + Clobber(rs_r0); + Clobber(rs_r1); + Clobber(rs_r2); + Clobber(rs_r3); + Clobber(rs_r12); + Clobber(rs_r14lr); + Clobber(rs_fr0); + Clobber(rs_fr1); + Clobber(rs_fr2); + Clobber(rs_fr3); + Clobber(rs_fr4); + Clobber(rs_fr5); + Clobber(rs_fr6); + Clobber(rs_fr7); + Clobber(rs_fr8); + Clobber(rs_fr9); + Clobber(rs_fr10); + Clobber(rs_fr11); + Clobber(rs_fr12); + Clobber(rs_fr13); + Clobber(rs_fr14); + Clobber(rs_fr15); + Clobber(rs_dr0); + Clobber(rs_dr1); + Clobber(rs_dr2); + Clobber(rs_dr3); + Clobber(rs_dr4); + Clobber(rs_dr5); + Clobber(rs_dr6); + Clobber(rs_dr7); +} + +RegLocation Arm64Mir2Lir::GetReturnWideAlt() { + RegLocation res = LocCReturnWide(); + res.reg.SetLowReg(rs_r2.GetReg()); + res.reg.SetHighReg(rs_r3.GetReg()); + Clobber(rs_r2); + Clobber(rs_r3); + MarkInUse(rs_r2); + MarkInUse(rs_r3); + MarkWide(res.reg); + return res; +} + +RegLocation Arm64Mir2Lir::GetReturnAlt() { + RegLocation res = LocCReturn(); + res.reg.SetReg(rs_r1.GetReg()); + Clobber(rs_r1); + MarkInUse(rs_r1); + return res; +} + +/* To be used when explicitly managing register use */ +void Arm64Mir2Lir::LockCallTemps() { + LockTemp(rs_r0); + LockTemp(rs_r1); + LockTemp(rs_r2); + LockTemp(rs_r3); +} + +/* To be used when explicitly managing register use */ +void Arm64Mir2Lir::FreeCallTemps() { + FreeTemp(rs_r0); + FreeTemp(rs_r1); + FreeTemp(rs_r2); + FreeTemp(rs_r3); +} + +RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) { + LoadWordDisp(rs_rARM_SELF, offset.Int32Value(), rs_rARM_LR); + return rs_rARM_LR; +} + +LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() { + RegStorage tmp = rs_r0; + Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp); + LIR* load2 = Load32Disp(tmp, 0, tmp); + return load2; +} + +uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); + return Arm64Mir2Lir::EncodingMap[opcode].flags; +} + +const char* Arm64Mir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); + return Arm64Mir2Lir::EncodingMap[opcode].name; +} + +const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); + return Arm64Mir2Lir::EncodingMap[opcode].fmt; +} + +/* + * Somewhat messy code here. We want to allocate a pair of contiguous + * physical single-precision floating point registers starting with + * an even numbered reg. It is possible that the paired s_reg (s_reg+1) + * has already been allocated - try to fit if possible. Fail to + * allocate if we can't meet the requirements for the pair of + * s_reg<=sX[even] & (s_reg+1)<= sX+1. + */ +// TODO: needs rewrite to support non-backed 64-bit float regs. +RegStorage Arm64Mir2Lir::AllocPreservedDouble(int s_reg) { + RegStorage res; + int v_reg = mir_graph_->SRegToVReg(s_reg); + int p_map_idx = SRegToPMap(s_reg); + if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) { + // Upper reg is already allocated. Can we fit? + int high_reg = promotion_map_[p_map_idx+1].FpReg; + if ((high_reg & 1) == 0) { + // High reg is even - fail. + return res; // Invalid. + } + // Is the low reg of the pair free? + // FIXME: rework. + RegisterInfo* p = GetRegInfo(RegStorage::FloatSolo32(high_reg - 1)); + if (p->InUse() || p->IsTemp()) { + // Already allocated or not preserved - fail. + return res; // Invalid. + } + // OK - good to go. + res = RegStorage::FloatSolo64(p->GetReg().GetRegNum() >> 1); + p->MarkInUse(); + MarkPreservedSingle(v_reg, p->GetReg()); + } else { + /* + * TODO: until runtime support is in, make sure we avoid promoting the same vreg to + * different underlying physical registers. + */ + GrowableArray<RegisterInfo*>::Iterator it(®_pool_->dp_regs_); + for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { + if (!info->IsTemp() && !info->InUse()) { + res = info->GetReg(); + info->MarkInUse(); + MarkPreservedDouble(v_reg, info->GetReg()); + break; + } + } + } + if (res.Valid()) { + promotion_map_[p_map_idx].fp_location = kLocPhysReg; + promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg(); + promotion_map_[p_map_idx+1].fp_location = kLocPhysReg; + promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg(); + } + return res; +} + +} // namespace art diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc new file mode 100644 index 000000000..d66b83413 --- /dev/null +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -0,0 +1,1149 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm64_lir.h" +#include "codegen_arm64.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +/* This file contains codegen for the Thumb ISA. */ + +static int32_t EncodeImmSingle(int32_t value) { + int32_t res; + int32_t bit_a = (value & 0x80000000) >> 31; + int32_t not_bit_b = (value & 0x40000000) >> 30; + int32_t bit_b = (value & 0x20000000) >> 29; + int32_t b_smear = (value & 0x3e000000) >> 25; + int32_t slice = (value & 0x01f80000) >> 19; + int32_t zeroes = (value & 0x0007ffff); + if (zeroes != 0) + return -1; + if (bit_b) { + if ((not_bit_b != 0) || (b_smear != 0x1f)) + return -1; + } else { + if ((not_bit_b != 1) || (b_smear != 0x0)) + return -1; + } + res = (bit_a << 7) | (bit_b << 6) | slice; + return res; +} + +/* + * Determine whether value can be encoded as a Thumb2 floating point + * immediate. If not, return -1. If so return encoded 8-bit value. + */ +static int32_t EncodeImmDouble(int64_t value) { + int32_t res; + int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63; + int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62; + int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61; + int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54; + int32_t slice = (value & INT64_C(0x003f000000000000)) >> 48; + uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff)); + if (zeroes != 0ull) + return -1; + if (bit_b) { + if ((not_bit_b != 0) || (b_smear != 0xff)) + return -1; + } else { + if ((not_bit_b != 1) || (b_smear != 0x0)) + return -1; + } + res = (bit_a << 7) | (bit_b << 6) | slice; + return res; +} + +LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int value) { + DCHECK(RegStorage::IsSingle(r_dest)); + if (value == 0) { + // TODO: we need better info about the target CPU. a vector exclusive or + // would probably be better here if we could rely on its existance. + // Load an immediate +2.0 (which encodes to 0) + NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0); + // +0.0 = +2.0 - +2.0 + return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest); + } else { + int encoded_imm = EncodeImmSingle(value); + if (encoded_imm >= 0) { + return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm); + } + } + LIR* data_target = ScanLiteralPool(literal_list_, value, 0); + if (data_target == NULL) { + data_target = AddWordData(&literal_list_, value); + } + LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, + r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target); + SetMemRefType(load_pc_rel, true, kLiteral); + AppendLIR(load_pc_rel); + return load_pc_rel; +} + +static int LeadingZeros(uint32_t val) { + uint32_t alt; + int32_t n; + int32_t count; + + count = 16; + n = 32; + do { + alt = val >> count; + if (alt != 0) { + n = n - count; + val = alt; + } + count >>= 1; + } while (count); + return n - val; +} + +/* + * Determine whether value can be encoded as a Thumb2 modified + * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. + */ +int Arm64Mir2Lir::ModifiedImmediate(uint32_t value) { + int32_t z_leading; + int32_t z_trailing; + uint32_t b0 = value & 0xff; + + /* Note: case of value==0 must use 0:000:0:0000000 encoding */ + if (value <= 0xFF) + return b0; // 0:000:a:bcdefgh + if (value == ((b0 << 16) | b0)) + return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */ + if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0)) + return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */ + b0 = (value >> 8) & 0xff; + if (value == ((b0 << 24) | (b0 << 8))) + return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */ + /* Can we do it with rotation? */ + z_leading = LeadingZeros(value); + z_trailing = 32 - LeadingZeros(~value & (value - 1)); + /* A run of eight or fewer active bits? */ + if ((z_leading + z_trailing) < 24) + return -1; /* No - bail */ + /* left-justify the constant, discarding msb (known to be 1) */ + value <<= z_leading + 1; + /* Create bcdefgh */ + value >>= 25; + /* Put it all together */ + return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */ +} + +bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { + return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); +} + +bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { + return EncodeImmSingle(value) >= 0; +} + +bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { + return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); +} + +bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { + return EncodeImmDouble(value) >= 0; +} + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool. + * + * No additional register clobbering operation performed. Use this version when + * 1) r_dest is freshly returned from AllocTemp or + * 2) The codegen is under fixed register usage + */ +LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { + LIR* res; + int mod_imm; + + if (r_dest.IsFloat()) { + return LoadFPConstantValue(r_dest.GetReg(), value); + } + + /* See if the value can be constructed cheaply */ + if (r_dest.Low8() && (value >= 0) && (value <= 255)) { + return NewLIR2(kThumbMovImm, r_dest.GetReg(), value); + } + /* Check Modified immediate special cases */ + mod_imm = ModifiedImmediate(value); + if (mod_imm >= 0) { + res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm); + return res; + } + mod_imm = ModifiedImmediate(~value); + if (mod_imm >= 0) { + res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm); + return res; + } + /* 16-bit immediate? */ + if ((value & 0xffff) == value) { + res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value); + return res; + } + /* Do a low/high pair */ + res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value)); + NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value)); + return res; +} + +LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) { + LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched during assembly */); + res->target = target; + return res; +} + +LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { + // This is kThumb2BCond instead of kThumbBCond for performance reasons. The assembly + // time required for a new pass after kThumbBCond is fixed up to kThumb2BCond is + // substantial. + LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */, + ArmConditionEncoding(cc)); + branch->target = target; + return branch; +} + +LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { + ArmOpcode opcode = kThumbBkpt; + switch (op) { + case kOpBlx: + opcode = kThumbBlxR; + break; + case kOpBx: + opcode = kThumbBx; + break; + default: + LOG(FATAL) << "Bad opcode " << op; + } + return NewLIR1(opcode, r_dest_src.GetReg()); +} + +LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, + int shift) { + bool thumb_form = + ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8()); + ArmOpcode opcode = kThumbBkpt; + switch (op) { + case kOpAdc: + opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR; + break; + case kOpAnd: + opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR; + break; + case kOpBic: + opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR; + break; + case kOpCmn: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR; + break; + case kOpCmp: + if (thumb_form) + opcode = kThumbCmpRR; + else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8()) + opcode = kThumbCmpHH; + else if ((shift == 0) && r_dest_src1.Low8()) + opcode = kThumbCmpLH; + else if (shift == 0) + opcode = kThumbCmpHL; + else + opcode = kThumb2CmpRR; + break; + case kOpXor: + opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR; + break; + case kOpMov: + DCHECK_EQ(shift, 0); + if (r_dest_src1.Low8() && r_src2.Low8()) + opcode = kThumbMovRR; + else if (!r_dest_src1.Low8() && !r_src2.Low8()) + opcode = kThumbMovRR_H2H; + else if (r_dest_src1.Low8()) + opcode = kThumbMovRR_H2L; + else + opcode = kThumbMovRR_L2H; + break; + case kOpMul: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR; + break; + case kOpMvn: + opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR; + break; + case kOpNeg: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR; + break; + case kOpOr: + opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR; + break; + case kOpSbc: + opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR; + break; + case kOpTst: + opcode = (thumb_form) ? kThumbTst : kThumb2TstRR; + break; + case kOpLsl: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR; + break; + case kOpLsr: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR; + break; + case kOpAsr: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR; + break; + case kOpRor: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR; + break; + case kOpAdd: + opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; + break; + case kOpSub: + opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; + break; + case kOpRev: + DCHECK_EQ(shift, 0); + if (!thumb_form) { + // Binary, but rm is encoded twice. + return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); + } + opcode = kThumbRev; + break; + case kOpRevsh: + DCHECK_EQ(shift, 0); + if (!thumb_form) { + // Binary, but rm is encoded twice. + return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); + } + opcode = kThumbRevsh; + break; + case kOp2Byte: + DCHECK_EQ(shift, 0); + return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8); + case kOp2Short: + DCHECK_EQ(shift, 0); + return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16); + case kOp2Char: + DCHECK_EQ(shift, 0); + return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16); + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + DCHECK(!IsPseudoLirOp(opcode)); + if (EncodingMap[opcode].flags & IS_BINARY_OP) { + return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg()); + } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { + if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) { + return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift); + } else { + return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg()); + } + } else if (EncodingMap[opcode].flags & IS_QUAD_OP) { + return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift); + } else { + LOG(FATAL) << "Unexpected encoding operand count"; + return NULL; + } +} + +LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { + return OpRegRegShift(op, r_dest_src1, r_src2, 0); +} + +LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { + UNIMPLEMENTED(FATAL); + return nullptr; +} + +LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) { + UNIMPLEMENTED(FATAL); + return nullptr; +} + +LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { + LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm"; + return NULL; +} + +LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, + RegStorage r_src2, int shift) { + ArmOpcode opcode = kThumbBkpt; + bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8(); + switch (op) { + case kOpAdd: + opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; + break; + case kOpSub: + opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; + break; + case kOpRsub: + opcode = kThumb2RsubRRR; + break; + case kOpAdc: + opcode = kThumb2AdcRRR; + break; + case kOpAnd: + opcode = kThumb2AndRRR; + break; + case kOpBic: + opcode = kThumb2BicRRR; + break; + case kOpXor: + opcode = kThumb2EorRRR; + break; + case kOpMul: + DCHECK_EQ(shift, 0); + opcode = kThumb2MulRRR; + break; + case kOpDiv: + DCHECK_EQ(shift, 0); + opcode = kThumb2SdivRRR; + break; + case kOpOr: + opcode = kThumb2OrrRRR; + break; + case kOpSbc: + opcode = kThumb2SbcRRR; + break; + case kOpLsl: + DCHECK_EQ(shift, 0); + opcode = kThumb2LslRRR; + break; + case kOpLsr: + DCHECK_EQ(shift, 0); + opcode = kThumb2LsrRRR; + break; + case kOpAsr: + DCHECK_EQ(shift, 0); + opcode = kThumb2AsrRRR; + break; + case kOpRor: + DCHECK_EQ(shift, 0); + opcode = kThumb2RorRRR; + break; + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + DCHECK(!IsPseudoLirOp(opcode)); + if (EncodingMap[opcode].flags & IS_QUAD_OP) { + return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); + } else { + DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); + return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); + } +} + +LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { + return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0); +} + +LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { + LIR* res; + bool neg = (value < 0); + int32_t abs_value = (neg) ? -value : value; + ArmOpcode opcode = kThumbBkpt; + ArmOpcode alt_opcode = kThumbBkpt; + bool all_low_regs = r_dest.Low8() && r_src1.Low8(); + int32_t mod_imm = ModifiedImmediate(value); + + switch (op) { + case kOpLsl: + if (all_low_regs) + return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + else + return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + case kOpLsr: + if (all_low_regs) + return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + else + return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + case kOpAsr: + if (all_low_regs) + return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + else + return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + case kOpRor: + return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value); + case kOpAdd: + if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) { + return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2); + } else if (r_dest.Low8() && (r_src1 == rs_r15pc) && + (value <= 1020) && ((value & 0x3) == 0)) { + return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2); + } + // Note: intentional fallthrough + case kOpSub: + if (all_low_regs && ((abs_value & 0x7) == abs_value)) { + if (op == kOpAdd) + opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3; + else + opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3; + return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value); + } + if (mod_imm < 0) { + mod_imm = ModifiedImmediate(-value); + if (mod_imm >= 0) { + op = (op == kOpAdd) ? kOpSub : kOpAdd; + } + } + if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) { + // This is deliberately used only if modified immediate encoding is inadequate since + // we sometimes actually use the flags for small values but not necessarily low regs. + if (op == kOpAdd) + opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12; + else + opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12; + return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value); + } + if (op == kOpSub) { + opcode = kThumb2SubRRI8M; + alt_opcode = kThumb2SubRRR; + } else { + opcode = kThumb2AddRRI8M; + alt_opcode = kThumb2AddRRR; + } + break; + case kOpRsub: + opcode = kThumb2RsubRRI8M; + alt_opcode = kThumb2RsubRRR; + break; + case kOpAdc: + opcode = kThumb2AdcRRI8M; + alt_opcode = kThumb2AdcRRR; + break; + case kOpSbc: + opcode = kThumb2SbcRRI8M; + alt_opcode = kThumb2SbcRRR; + break; + case kOpOr: + opcode = kThumb2OrrRRI8M; + alt_opcode = kThumb2OrrRRR; + break; + case kOpAnd: + if (mod_imm < 0) { + mod_imm = ModifiedImmediate(~value); + if (mod_imm >= 0) { + return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm); + } + } + opcode = kThumb2AndRRI8M; + alt_opcode = kThumb2AndRRR; + break; + case kOpXor: + opcode = kThumb2EorRRI8M; + alt_opcode = kThumb2EorRRR; + break; + case kOpMul: + // TUNING: power of 2, shift & add + mod_imm = -1; + alt_opcode = kThumb2MulRRR; + break; + case kOpCmp: { + LIR* res; + if (mod_imm >= 0) { + res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm); + } else { + mod_imm = ModifiedImmediate(-value); + if (mod_imm >= 0) { + res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm); + } else { + RegStorage r_tmp = AllocTemp(); + res = LoadConstant(r_tmp, value); + OpRegReg(kOpCmp, r_src1, r_tmp); + FreeTemp(r_tmp); + } + } + return res; + } + default: + LOG(FATAL) << "Bad opcode: " << op; + } + + if (mod_imm >= 0) { + return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm); + } else { + RegStorage r_scratch = AllocTemp(); + LoadConstant(r_scratch, value); + if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) + res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); + else + res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); + FreeTemp(r_scratch); + return res; + } +} + +/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */ +LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { + bool neg = (value < 0); + int32_t abs_value = (neg) ? -value : value; + bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8()); + ArmOpcode opcode = kThumbBkpt; + switch (op) { + case kOpAdd: + if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */ + DCHECK_EQ((value & 0x3), 0); + return NewLIR1(kThumbAddSpI7, value >> 2); + } else if (short_form) { + opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8; + } + break; + case kOpSub: + if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */ + DCHECK_EQ((value & 0x3), 0); + return NewLIR1(kThumbSubSpI7, value >> 2); + } else if (short_form) { + opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8; + } + break; + case kOpCmp: + if (!neg && short_form) { + opcode = kThumbCmpRI8; + } else { + short_form = false; + } + break; + default: + /* Punt to OpRegRegImm - if bad case catch it there */ + short_form = false; + break; + } + if (short_form) { + return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value); + } else { + return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); + } +} + +LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { + LIR* res = NULL; + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); + if (r_dest.IsFloat()) { + DCHECK(!r_dest.IsPair()); + if ((val_lo == 0) && (val_hi == 0)) { + // TODO: we need better info about the target CPU. a vector exclusive or + // would probably be better here if we could rely on its existance. + // Load an immediate +2.0 (which encodes to 0) + NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0); + // +0.0 = +2.0 - +2.0 + res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg()); + } else { + int encoded_imm = EncodeImmDouble(value); + if (encoded_imm >= 0) { + res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm); + } + } + } else { + // NOTE: Arm32 assumption here. + DCHECK(r_dest.IsPair()); + if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) { + res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); + LoadConstantNoClobber(r_dest.GetHigh(), val_hi); + } + } + if (res == NULL) { + // No short form - load from the literal pool. + LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); + if (data_target == NULL) { + data_target = AddWideData(&literal_list_, val_lo, val_hi); + } + if (r_dest.IsFloat()) { + res = RawLIR(current_dalvik_offset_, kThumb2Vldrd, + r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target); + } else { + DCHECK(r_dest.IsPair()); + res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8, + r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target); + } + SetMemRefType(res, true, kLiteral); + AppendLIR(res); + } + return res; +} + +int Arm64Mir2Lir::EncodeShift(int code, int amount) { + return ((amount & 0x1f) << 2) | code; +} + +LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, + int scale, OpSize size) { + bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8(); + LIR* load; + ArmOpcode opcode = kThumbBkpt; + bool thumb_form = (all_low_regs && (scale == 0)); + RegStorage reg_ptr; + + if (r_dest.IsFloat()) { + if (r_dest.IsSingle()) { + DCHECK((size == k32) || (size == kSingle) || (size == kReference)); + opcode = kThumb2Vldrs; + size = kSingle; + } else { + DCHECK(r_dest.IsDouble()); + DCHECK((size == k64) || (size == kDouble)); + opcode = kThumb2Vldrd; + size = kDouble; + } + } else { + if (size == kSingle) + size = k32; + } + + switch (size) { + case kDouble: // fall-through + // Intentional fall-though. + case kSingle: + reg_ptr = AllocTemp(); + if (scale) { + NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(), + EncodeShift(kArmLsl, scale)); + } else { + OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index); + } + load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0); + FreeTemp(reg_ptr); + return load; + case k32: + // Intentional fall-though. + case kReference: + opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR; + break; + case kUnsignedHalf: + opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR; + break; + case kSignedHalf: + opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR; + break; + case kUnsignedByte: + opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR; + break; + case kSignedByte: + opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR; + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + if (thumb_form) + load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); + else + load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + + return load; +} + +LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, + int scale, OpSize size) { + bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8(); + LIR* store = NULL; + ArmOpcode opcode = kThumbBkpt; + bool thumb_form = (all_low_regs && (scale == 0)); + RegStorage reg_ptr; + + if (r_src.IsFloat()) { + if (r_src.IsSingle()) { + DCHECK((size == k32) || (size == kSingle) || (size == kReference)); + opcode = kThumb2Vstrs; + size = kSingle; + } else { + DCHECK(r_src.IsDouble()); + DCHECK((size == k64) || (size == kDouble)); + DCHECK_EQ((r_src.GetReg() & 0x1), 0); + opcode = kThumb2Vstrd; + size = kDouble; + } + } else { + if (size == kSingle) + size = k32; + } + + switch (size) { + case kDouble: // fall-through + // Intentional fall-though. + case kSingle: + reg_ptr = AllocTemp(); + if (scale) { + NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(), + EncodeShift(kArmLsl, scale)); + } else { + OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index); + } + store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0); + FreeTemp(reg_ptr); + return store; + case k32: + // Intentional fall-though. + case kReference: + opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR; + break; + case kUnsignedHalf: + // Intentional fall-though. + case kSignedHalf: + opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR; + break; + case kUnsignedByte: + // Intentional fall-though. + case kSignedByte: + opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR; + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + if (thumb_form) + store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); + else + store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + + return store; +} + +/* + * Load value from base + displacement. Optionally perform null check + * on base (which must have an associated s_reg and MIR). If not + * performing null check, incoming MIR can be null. + */ +LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, + OpSize size, int s_reg) { + LIR* load = NULL; + ArmOpcode opcode = kThumbBkpt; + bool short_form = false; + bool thumb2Form = (displacement < 4092 && displacement >= 0); + bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8(); + int encoded_disp = displacement; + bool already_generated = false; + bool null_pointer_safepoint = false; + switch (size) { + case kDouble: + // Intentional fall-though. + case k64: + if (r_dest.IsFloat()) { + DCHECK(!r_dest.IsPair()); + opcode = kThumb2Vldrd; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + } else { + if (displacement <= 1020) { + load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(), + displacement >> 2); + } else { + load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg); + null_pointer_safepoint = true; + LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG); + } + already_generated = true; + } + break; + case kSingle: + // Intentional fall-though. + case k32: + // Intentional fall-though. + case kReference: + if (r_dest.IsFloat()) { + opcode = kThumb2Vldrs; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + break; + } + if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && + (displacement >= 0)) { + short_form = true; + encoded_disp >>= 2; + opcode = kThumbLdrPcRel; + } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) && + (displacement >= 0)) { + short_form = true; + encoded_disp >>= 2; + opcode = kThumbLdrSpRel; + } else if (all_low && displacement < 128 && displacement >= 0) { + DCHECK_EQ((displacement & 0x3), 0); + short_form = true; + encoded_disp >>= 2; + opcode = kThumbLdrRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrRRI12; + } + break; + case kUnsignedHalf: + if (all_low && displacement < 64 && displacement >= 0) { + DCHECK_EQ((displacement & 0x1), 0); + short_form = true; + encoded_disp >>= 1; + opcode = kThumbLdrhRRI5; + } else if (displacement < 4092 && displacement >= 0) { + short_form = true; + opcode = kThumb2LdrhRRI12; + } + break; + case kSignedHalf: + if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrshRRI12; + } + break; + case kUnsignedByte: + if (all_low && displacement < 32 && displacement >= 0) { + short_form = true; + opcode = kThumbLdrbRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrbRRI12; + } + break; + case kSignedByte: + if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrsbRRI12; + } + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + + if (!already_generated) { + if (short_form) { + load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp); + } else { + RegStorage reg_offset = AllocTemp(); + LoadConstant(reg_offset, encoded_disp); + if (r_dest.IsFloat()) { + // No index ops - must use a long sequence. Turn the offset into a direct pointer. + OpRegReg(kOpAdd, reg_offset, r_base); + load = LoadBaseDispBody(reg_offset, 0, r_dest, size, s_reg); + } else { + load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size); + } + FreeTemp(reg_offset); + } + } + + // TODO: in future may need to differentiate Dalvik accesses w/ spills + if (r_base == rs_rARM_SP) { + AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); + } else { + // We might need to generate a safepoint if we have two store instructions (wide or double). + if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { + MarkSafepointPC(load); + } + } + return load; +} + +LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size, + int s_reg) { + DCHECK(!((size == k64) || (size == kDouble))); + // TODO: base this on target. + if (size == kWord) { + size = k32; + } + return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg); +} + +LIR* Arm64Mir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest, + int s_reg) { + return LoadBaseDispBody(r_base, displacement, r_dest, k64, s_reg); +} + + +LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, + OpSize size) { + LIR* store = NULL; + ArmOpcode opcode = kThumbBkpt; + bool short_form = false; + bool thumb2Form = (displacement < 4092 && displacement >= 0); + bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8(); + int encoded_disp = displacement; + bool already_generated = false; + bool null_pointer_safepoint = false; + switch (size) { + case k64: + case kDouble: + if (!r_src.IsFloat()) { + if (displacement <= 1020) { + store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(), + displacement >> 2); + } else { + store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32); + null_pointer_safepoint = true; + StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32); + } + already_generated = true; + } else { + DCHECK(!r_src.IsPair()); + opcode = kThumb2Vstrd; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + } + break; + case kSingle: + // Intentional fall-through. + case k32: + // Intentional fall-through. + case kReference: + if (r_src.IsFloat()) { + DCHECK(r_src.IsSingle()); + opcode = kThumb2Vstrs; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + break; + } + if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { + short_form = true; + encoded_disp >>= 2; + opcode = kThumbStrSpRel; + } else if (all_low && displacement < 128 && displacement >= 0) { + DCHECK_EQ((displacement & 0x3), 0); + short_form = true; + encoded_disp >>= 2; + opcode = kThumbStrRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2StrRRI12; + } + break; + case kUnsignedHalf: + case kSignedHalf: + if (all_low && displacement < 64 && displacement >= 0) { + DCHECK_EQ((displacement & 0x1), 0); + short_form = true; + encoded_disp >>= 1; + opcode = kThumbStrhRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2StrhRRI12; + } + break; + case kUnsignedByte: + case kSignedByte: + if (all_low && displacement < 32 && displacement >= 0) { + short_form = true; + opcode = kThumbStrbRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2StrbRRI12; + } + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + if (!already_generated) { + if (short_form) { + store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp); + } else { + RegStorage r_scratch = AllocTemp(); + LoadConstant(r_scratch, encoded_disp); + if (r_src.IsFloat()) { + // No index ops - must use a long sequence. Turn the offset into a direct pointer. + OpRegReg(kOpAdd, r_scratch, r_base); + store = StoreBaseDispBody(r_scratch, 0, r_src, size); + } else { + store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); + } + FreeTemp(r_scratch); + } + } + + // TODO: In future, may need to differentiate Dalvik & spill accesses + if (r_base == rs_rARM_SP) { + AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); + } else { + // We might need to generate a safepoint if we have two store instructions (wide or double). + if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) { + MarkSafepointPC(store); + } + } + return store; +} + +LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, + OpSize size) { + // TODO: base this on target. + if (size == kWord) { + size = k32; + } + DCHECK(!((size == k64) || (size == kDouble))); + return StoreBaseDispBody(r_base, displacement, r_src, size); +} + +LIR* Arm64Mir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) { + return StoreBaseDispBody(r_base, displacement, r_src, k64); +} + +LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { + int opcode; + DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble()); + if (r_dest.IsDouble()) { + opcode = kThumb2Vmovd; + } else { + if (r_dest.IsSingle()) { + opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr; + } else { + DCHECK(r_src.IsSingle()); + opcode = kThumb2Fmrs; + } + } + LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) { + LOG(FATAL) << "Unexpected use of OpThreadMem for Arm"; + return NULL; +} + +LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { + LOG(FATAL) << "Unexpected use of OpMem for Arm"; + return NULL; +} + +LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, + int displacement, RegStorage r_src, OpSize size, int s_reg) { + LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm"; + return NULL; +} + +LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) { + LOG(FATAL) << "Unexpected use of OpRegMem for Arm"; + return NULL; +} + +LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, + int displacement, RegStorage r_dest, OpSize size, int s_reg) { + LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm"; + return NULL; +} + +} // namespace art |