diff options
Diffstat (limited to 'compiler/dex/quick')
34 files changed, 22838 insertions, 0 deletions
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h new file mode 100644 index 0000000000..9dd7dafcd6 --- /dev/null +++ b/compiler/dex/quick/arm/arm_lir.h @@ -0,0 +1,499 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_ARM_ARMLIR_H_ +#define ART_SRC_COMPILER_DEX_QUICK_ARM_ARMLIR_H_ + +#include "dex/compiler_internals.h" + +namespace art { + +/* + * Runtime register usage conventions. + * + * r0-r3: Argument registers in both Dalvik and C/C++ conventions. + * However, for Dalvik->Dalvik calls we'll pass the target's Method* + * pointer in r0 as a hidden arg0. Otherwise used as codegen scratch + * registers. + * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit + * r4 : (rARM_SUSPEND) is reserved (suspend check/debugger assist) + * r5 : Callee save (promotion target) + * r6 : Callee save (promotion target) + * r7 : Callee save (promotion target) + * r8 : Callee save (promotion target) + * r9 : (rARM_SELF) is reserved (pointer to thread-local storage) + * r10 : Callee save (promotion target) + * r11 : Callee save (promotion target) + * r12 : Scratch, may be trashed by linkage stubs + * r13 : (sp) is reserved + * r14 : (lr) is reserved + * r15 : (pc) is reserved + * + * 5 core temps that codegen can use (r0, r1, r2, r3, r12) + * 7 core registers that can be used for promotion + * + * Floating pointer registers + * s0-s31 + * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31} + * + * s16-s31 (d8-d15) preserved across C calls + * s0-s15 (d0-d7) trashed across C calls + * + * s0-s15/d0-d7 used as codegen temp/scratch + * s16-s31/d8-d31 can be used for promotion. + * + * Calling convention + * o On a call to a Dalvik method, pass target's Method* in r0 + * o r1-r3 will be used for up to the first 3 words of arguments + * o Arguments past the first 3 words will be placed in appropriate + * out slots by the caller. + * o If a 64-bit argument would span the register/memory argument + * boundary, it will instead be fully passed in the frame. + * o Maintain a 16-byte stack alignment + * + * Stack frame diagram (stack grows down, higher addresses at top): + * + * +------------------------+ + * | IN[ins-1] | {Note: resides in caller's frame} + * | . | + * | IN[0] | + * | caller's Method* | + * +========================+ {Note: start of callee's frame} + * | spill region | {variable sized - will include lr if non-leaf.} + * +------------------------+ + * | ...filler word... | {Note: used as 2nd word of V[locals-1] if long] + * +------------------------+ + * | V[locals-1] | + * | V[locals-2] | + * | . | + * | . | + * | V[1] | + * | V[0] | + * +------------------------+ + * | 0 to 3 words padding | + * +------------------------+ + * | OUT[outs-1] | + * | OUT[outs-2] | + * | . | + * | OUT[0] | + * | cur_method* | <<== sp w/ 16-byte alignment + * +========================+ + */ + +// Offset to distingish FP regs. +#define ARM_FP_REG_OFFSET 32 +// Offset to distinguish DP FP regs. +#define ARM_FP_DOUBLE 64 +// First FP callee save. +#define ARM_FP_CALLEE_SAVE_BASE 16 +// Reg types. +#define ARM_REGTYPE(x) (x & (ARM_FP_REG_OFFSET | ARM_FP_DOUBLE)) +#define ARM_FPREG(x) ((x & ARM_FP_REG_OFFSET) == ARM_FP_REG_OFFSET) +#define ARM_LOWREG(x) ((x & 0x7) == x) +#define ARM_DOUBLEREG(x) ((x & ARM_FP_DOUBLE) == ARM_FP_DOUBLE) +#define ARM_SINGLEREG(x) (ARM_FPREG(x) && !ARM_DOUBLEREG(x)) + +/* + * Note: the low register of a floating point pair is sufficient to + * create the name of a double, but require both names to be passed to + * allow for asserts to verify that the pair is consecutive if significant + * rework is done in this area. Also, it is a good reminder in the calling + * code that reg locations always describe doubles as a pair of singles. + */ +#define ARM_S2D(x,y) ((x) | ARM_FP_DOUBLE) +// Mask to strip off fp flags. +#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET-1) + +// RegisterLocation templates return values (r0, or r0/r1). +#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r0, INVALID_REG,\ + INVALID_SREG, INVALID_SREG} +#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, \ + INVALID_SREG, INVALID_SREG} +#define ARM_LOC_C_RETURN_FLOAT ARM_LOC_C_RETURN +#define ARM_LOC_C_RETURN_DOUBLE ARM_LOC_C_RETURN_WIDE + +enum ArmResourceEncodingPos { + kArmGPReg0 = 0, + kArmRegSP = 13, + kArmRegLR = 14, + kArmRegPC = 15, + kArmFPReg0 = 16, + kArmFPReg16 = 32, + kArmRegEnd = 48, +}; + +#define ENCODE_ARM_REG_LIST(N) (static_cast<uint64_t>(N)) +#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP) +#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR) +#define ENCODE_ARM_REG_PC (1ULL << kArmRegPC) +#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16) + +enum ArmNativeRegisterPool { + r0 = 0, + r1 = 1, + r2 = 2, + r3 = 3, + rARM_SUSPEND = 4, + r5 = 5, + r6 = 6, + r7 = 7, + r8 = 8, + rARM_SELF = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13sp = 13, + rARM_SP = 13, + r14lr = 14, + rARM_LR = 14, + r15pc = 15, + rARM_PC = 15, + fr0 = 0 + ARM_FP_REG_OFFSET, + fr1 = 1 + ARM_FP_REG_OFFSET, + fr2 = 2 + ARM_FP_REG_OFFSET, + fr3 = 3 + ARM_FP_REG_OFFSET, + fr4 = 4 + ARM_FP_REG_OFFSET, + fr5 = 5 + ARM_FP_REG_OFFSET, + fr6 = 6 + ARM_FP_REG_OFFSET, + fr7 = 7 + ARM_FP_REG_OFFSET, + fr8 = 8 + ARM_FP_REG_OFFSET, + fr9 = 9 + ARM_FP_REG_OFFSET, + fr10 = 10 + ARM_FP_REG_OFFSET, + fr11 = 11 + ARM_FP_REG_OFFSET, + fr12 = 12 + ARM_FP_REG_OFFSET, + fr13 = 13 + ARM_FP_REG_OFFSET, + fr14 = 14 + ARM_FP_REG_OFFSET, + fr15 = 15 + ARM_FP_REG_OFFSET, + fr16 = 16 + ARM_FP_REG_OFFSET, + fr17 = 17 + ARM_FP_REG_OFFSET, + fr18 = 18 + ARM_FP_REG_OFFSET, + fr19 = 19 + ARM_FP_REG_OFFSET, + fr20 = 20 + ARM_FP_REG_OFFSET, + fr21 = 21 + ARM_FP_REG_OFFSET, + fr22 = 22 + ARM_FP_REG_OFFSET, + fr23 = 23 + ARM_FP_REG_OFFSET, + fr24 = 24 + ARM_FP_REG_OFFSET, + fr25 = 25 + ARM_FP_REG_OFFSET, + fr26 = 26 + ARM_FP_REG_OFFSET, + fr27 = 27 + ARM_FP_REG_OFFSET, + fr28 = 28 + ARM_FP_REG_OFFSET, + fr29 = 29 + ARM_FP_REG_OFFSET, + fr30 = 30 + ARM_FP_REG_OFFSET, + fr31 = 31 + ARM_FP_REG_OFFSET, + dr0 = fr0 + ARM_FP_DOUBLE, + dr1 = fr2 + ARM_FP_DOUBLE, + dr2 = fr4 + ARM_FP_DOUBLE, + dr3 = fr6 + ARM_FP_DOUBLE, + dr4 = fr8 + ARM_FP_DOUBLE, + dr5 = fr10 + ARM_FP_DOUBLE, + dr6 = fr12 + ARM_FP_DOUBLE, + dr7 = fr14 + ARM_FP_DOUBLE, + dr8 = fr16 + ARM_FP_DOUBLE, + dr9 = fr18 + ARM_FP_DOUBLE, + dr10 = fr20 + ARM_FP_DOUBLE, + dr11 = fr22 + ARM_FP_DOUBLE, + dr12 = fr24 + ARM_FP_DOUBLE, + dr13 = fr26 + ARM_FP_DOUBLE, + dr14 = fr28 + ARM_FP_DOUBLE, + dr15 = fr30 + ARM_FP_DOUBLE, +}; + +// Target-independent aliases. +#define rARM_ARG0 r0 +#define rARM_ARG1 r1 +#define rARM_ARG2 r2 +#define rARM_ARG3 r3 +#define rARM_FARG0 r0 +#define rARM_FARG1 r1 +#define rARM_FARG2 r2 +#define rARM_FARG3 r3 +#define rARM_RET0 r0 +#define rARM_RET1 r1 +#define rARM_INVOKE_TGT rARM_LR +#define rARM_COUNT INVALID_REG + +enum ArmShiftEncodings { + kArmLsl = 0x0, + kArmLsr = 0x1, + kArmAsr = 0x2, + kArmRor = 0x3 +}; + +/* + * The following enum defines the list of supported Thumb instructions by the + * assembler. Their corresponding EncodingMap positions will be defined in + * Assemble.cc. + */ +enum ArmOpcode { + kArmFirst = 0, + kArm16BitData = kArmFirst, // DATA [0] rd[15..0]. + kThumbAdcRR, // adc [0100000101] rm[5..3] rd[2..0]. + kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/ + kThumbAddRI8, // add(2) [00110] rd[10..8] imm_8[7..0]. + kThumbAddRRR, // add(3) [0001100] rm[8..6] rn[5..3] rd[2..0]. + kThumbAddRRLH, // add(4) [01000100] H12[01] rm[5..3] rd[2..0]. + kThumbAddRRHL, // add(4) [01001000] H12[10] rm[5..3] rd[2..0]. + kThumbAddRRHH, // add(4) [01001100] H12[11] rm[5..3] rd[2..0]. + kThumbAddPcRel, // add(5) [10100] rd[10..8] imm_8[7..0]. + kThumbAddSpRel, // add(6) [10101] rd[10..8] imm_8[7..0]. + kThumbAddSpI7, // add(7) [101100000] imm_7[6..0]. + kThumbAndRR, // and [0100000000] rm[5..3] rd[2..0]. + kThumbAsrRRI5, // asr(1) [00010] imm_5[10..6] rm[5..3] rd[2..0]. + kThumbAsrRR, // asr(2) [0100000100] rs[5..3] rd[2..0]. + kThumbBCond, // b(1) [1101] cond[11..8] offset_8[7..0]. + kThumbBUncond, // b(2) [11100] offset_11[10..0]. + kThumbBicRR, // bic [0100001110] rm[5..3] rd[2..0]. + kThumbBkpt, // bkpt [10111110] imm_8[7..0]. + kThumbBlx1, // blx(1) [111] H[10] offset_11[10..0]. + kThumbBlx2, // blx(1) [111] H[01] offset_11[10..0]. + kThumbBl1, // blx(1) [111] H[10] offset_11[10..0]. + kThumbBl2, // blx(1) [111] H[11] offset_11[10..0]. + kThumbBlxR, // blx(2) [010001111] rm[6..3] [000]. + kThumbBx, // bx [010001110] H2[6..6] rm[5..3] SBZ[000]. + kThumbCmnRR, // cmn [0100001011] rm[5..3] rd[2..0]. + kThumbCmpRI8, // cmp(1) [00101] rn[10..8] imm_8[7..0]. + kThumbCmpRR, // cmp(2) [0100001010] rm[5..3] rd[2..0]. + kThumbCmpLH, // cmp(3) [01000101] H12[01] rm[5..3] rd[2..0]. + kThumbCmpHL, // cmp(3) [01000110] H12[10] rm[5..3] rd[2..0]. + kThumbCmpHH, // cmp(3) [01000111] H12[11] rm[5..3] rd[2..0]. + kThumbEorRR, // eor [0100000001] rm[5..3] rd[2..0]. + kThumbLdmia, // ldmia [11001] rn[10..8] reglist [7..0]. + kThumbLdrRRI5, // ldr(1) [01101] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbLdrRRR, // ldr(2) [0101100] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrPcRel, // ldr(3) [01001] rd[10..8] imm_8[7..0]. + kThumbLdrSpRel, // ldr(4) [10011] rd[10..8] imm_8[7..0]. + kThumbLdrbRRI5, // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbLdrbRRR, // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrhRRI5, // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbLdrhRRR, // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrsbRRR, // ldrsb [0101011] rm[8..6] rn[5..3] rd[2..0]. + kThumbLdrshRRR, // ldrsh [0101111] rm[8..6] rn[5..3] rd[2..0]. + kThumbLslRRI5, // lsl(1) [00000] imm_5[10..6] rm[5..3] rd[2..0]. + kThumbLslRR, // lsl(2) [0100000010] rs[5..3] rd[2..0]. + kThumbLsrRRI5, // lsr(1) [00001] imm_5[10..6] rm[5..3] rd[2..0]. + kThumbLsrRR, // lsr(2) [0100000011] rs[5..3] rd[2..0]. + kThumbMovImm, // mov(1) [00100] rd[10..8] imm_8[7..0]. + kThumbMovRR, // mov(2) [0001110000] rn[5..3] rd[2..0]. + kThumbMovRR_H2H, // mov(3) [01000111] H12[11] rm[5..3] rd[2..0]. + kThumbMovRR_H2L, // mov(3) [01000110] H12[01] rm[5..3] rd[2..0]. + kThumbMovRR_L2H, // mov(3) [01000101] H12[10] rm[5..3] rd[2..0]. + kThumbMul, // mul [0100001101] rm[5..3] rd[2..0]. + kThumbMvn, // mvn [0100001111] rm[5..3] rd[2..0]. + kThumbNeg, // neg [0100001001] rm[5..3] rd[2..0]. + kThumbOrr, // orr [0100001100] rm[5..3] rd[2..0]. + kThumbPop, // pop [1011110] r[8..8] rl[7..0]. + kThumbPush, // push [1011010] r[8..8] rl[7..0]. + kThumbRorRR, // ror [0100000111] rs[5..3] rd[2..0]. + kThumbSbc, // sbc [0100000110] rm[5..3] rd[2..0]. + kThumbStmia, // stmia [11000] rn[10..8] reglist [7.. 0]. + kThumbStrRRI5, // str(1) [01100] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbStrRRR, // str(2) [0101000] rm[8..6] rn[5..3] rd[2..0]. + kThumbStrSpRel, // str(3) [10010] rd[10..8] imm_8[7..0]. + kThumbStrbRRI5, // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbStrbRRR, // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0]. + kThumbStrhRRI5, // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0]. + kThumbStrhRRR, // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0]. + kThumbSubRRI3, // sub(1) [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/ + kThumbSubRI8, // sub(2) [00111] rd[10..8] imm_8[7..0]. + kThumbSubRRR, // sub(3) [0001101] rm[8..6] rn[5..3] rd[2..0]. + kThumbSubSpI7, // sub(4) [101100001] imm_7[6..0]. + kThumbSwi, // swi [11011111] imm_8[7..0]. + kThumbTst, // tst [0100001000] rm[5..3] rn[2..0]. + kThumb2Vldrs, // vldr low sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0]. + kThumb2Vldrd, // vldr low dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0]. + kThumb2Vmuls, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0]. + kThumb2Vmuld, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0]. + kThumb2Vstrs, // vstr low sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0]. + kThumb2Vstrd, // vstr low dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0]. + kThumb2Vsubs, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0]. + kThumb2Vsubd, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0]. + kThumb2Vadds, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0]. + kThumb2Vaddd, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0]. + kThumb2Vdivs, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0]. + kThumb2Vdivd, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0]. + kThumb2VcvtIF, // vcvt.F32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0]. + kThumb2VcvtID, // vcvt.F64 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0]. + kThumb2VcvtFI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0]. + kThumb2VcvtDI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0]. + kThumb2VcvtFd, // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0]. + kThumb2VcvtDF, // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0]. + kThumb2Vsqrts, // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0]. + kThumb2Vsqrtd, // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0]. + kThumb2MovImmShift,// mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8. + kThumb2MovImm16, // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8. + kThumb2StrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0]. + kThumb2LdrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0]. + kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0]*/ + kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0]*/ + kThumb2Cbnz, // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0]. + kThumb2Cbz, // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0]. + kThumb2AddRRI12, // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2MovRR, // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0]. + kThumb2Vmovs, // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0]. + kThumb2Vmovd, // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0]. + kThumb2Ldmia, // ldmia [111010001001[ rn[19..16] mask[15..0]. + kThumb2Stmia, // stmia [111010001000[ rn[19..16] mask[15..0]. + kThumb2AddRRR, // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2SubRRR, // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2SbcRRR, // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2CmpRR, // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0]. + kThumb2SubRRI12, // sub rd, rn, #imm12 [11110] i [01010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0]. + kThumb2MvnImm12, // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8. + kThumb2Sel, // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0]. + kThumb2Ubfx, // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0]. + kThumb2Sbfx, // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0]. + kThumb2LdrRRR, // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrhRRR, // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrshRRR, // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrbRRR, // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrsbRRR, // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2StrRRR, // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2StrhRRR, // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2StrbRRR, // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0]. + kThumb2LdrhRRI12, // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2LdrshRRI12, // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2LdrbRRI12, // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2LdrsbRRI12, // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2StrhRRI12, // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0]. + kThumb2Pop, // pop [1110100010111101] list[15-0]*/ + kThumb2Push, // push [1110100100101101] list[15-0]*/ + kThumb2CmpRI12, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0]. + kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0]. + kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0]. + kThumb2RsubRRI8, // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0]. + kThumb2NegRR, // actually rsub rd, rn, #0. + kThumb2OrrRRR, // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2TstRR, // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0]. + kThumb2LslRRR, // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0]. + kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0]. + kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0]. + kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0]. + kThumb2BicRRI8, // bic [111100000010] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2AndRRI8, // bic [111100000000] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2OrrRRI8, // orr [111100000100] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2EorRRI8, // eor [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2AddRRI8, // add [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2AdcRRI8, // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2SubRRI8, // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2SbcRRI8, // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2It, // it [10111111] firstcond[7-4] mask[3-0]. + kThumb2Fmstat, // fmstat [11101110111100011111101000010000]. + kThumb2Vcmpd, // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0]. + kThumb2Vcmps, // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0]. + kThumb2LdrPcRel12, // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0]. + kThumb2BCond, // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0]. + kThumb2Vmovd_RR, // vmov [111011101] D [110000] vd[15-12 [101101] M [0] vm[3-0]. + kThumb2Vmovs_RR, // vmov [111011101] D [110000] vd[15-12 [101001] M [0] vm[3-0]. + kThumb2Fmrs, // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000]. + kThumb2Fmsr, // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000]. + kThumb2Fmrrd, // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. + kThumb2Fmdrr, // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0]. + kThumb2Vabsd, // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0]. + kThumb2Vabss, // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. + kThumb2Vnegd, // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0]. + kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0]. + kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0]. + kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0]. + kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0]. + kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. + kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[11-8] [1111] imm8[7-0]. + kThumb2Strex, // strex [111010000100] rn[19-16] rt[11-8] rd[11-8] imm8[7-0]. + kThumb2Clrex, // clrex [111100111011111110000111100101111]. + kThumb2Bfi, // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. + kThumb2Bfc, // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. + kThumb2Dmb, // dmb [1111001110111111100011110101] option[3-0]. + kThumb2LdrPcReln12,// ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0]. + kThumb2Stm, // stm <list> [111010010000] rn[19-16] 000 rl[12-0]. + kThumbUndefined, // undefined [11011110xxxxxxxx]. + kThumb2VPopCS, // vpop <list of callee save fp singles (s16+). + kThumb2VPushCS, // vpush <list callee save fp singles (s16+). + kThumb2Vldms, // vldms rd, <list>. + kThumb2Vstms, // vstms rd, <list>. + kThumb2BUncond, // b <label>. + kThumb2MovImm16H, // similar to kThumb2MovImm16, but target high hw. + kThumb2AddPCR, // Thumb2 2-operand add with hard-coded PC target. + kThumb2Adr, // Special purpose encoding of ADR for switch tables. + kThumb2MovImm16LST,// Special purpose version for switch table use. + kThumb2MovImm16HST,// Special purpose version for switch table use. + kThumb2LdmiaWB, // ldmia [111010011001[ rn[19..16] mask[15..0]. + kThumb2SubsRRI12, // setflags encoding. + kThumb2OrrRRRs, // orrx [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2Push1, // t3 encoding of push. + kThumb2Pop1, // t3 encoding of pop. + kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. + kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024. + kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024]. + kThumb2StrdI8, // strd rt, rt2, [rn +-/1024]. + kArmLast, +}; + +enum ArmOpDmbOptions { + kSY = 0xf, + kST = 0xe, + kISH = 0xb, + kISHST = 0xa, + kNSH = 0x7, + kNSHST = 0x6 +}; + +// Instruction assembly field_loc kind. +enum ArmEncodingKind { + kFmtUnused, + kFmtBitBlt, // Bit string using end/start. + kFmtDfp, // Double FP reg. + kFmtSfp, // Single FP reg. + kFmtModImm, // Shifted 8-bit immed using [26,14..12,7..0]. + kFmtImm16, // Zero-extended immed using [26,19..16,14..12,7..0]. + kFmtImm6, // Encoded branch target using [9,7..3]0. + kFmtImm12, // Zero-extended immediate using [26,14..12,7..0]. + kFmtShift, // Shift descriptor, [14..12,7..4]. + kFmtLsb, // least significant bit using [14..12][7..6]. + kFmtBWidth, // bit-field width, encoded as width-1. + kFmtShift5, // Shift count, [14..12,7..6]. + kFmtBrOffset, // Signed extended [26,11,13,21-16,10-0]:0. + kFmtFPImm, // Encoded floating point immediate. + kFmtOff24, // 24-bit Thumb2 unconditional branch encoding. +}; + +// Struct used to define the snippet positions for each Thumb opcode. +struct ArmEncodingMap { + uint32_t skeleton; + struct { + ArmEncodingKind kind; + int end; // end for kFmtBitBlt, 1-bit slice end for FP regs. + int start; // start for kFmtBitBlt, 4-bit slice end for FP regs. + } field_loc[4]; + ArmOpcode opcode; + uint64_t flags; + const char* name; + const char* fmt; + int size; // Note: size is in bytes. +}; + +} // namespace art + +#endif // ART_SRC_COMPILER_DEX_QUICK_ARM_ARMLIR_H_ diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc new file mode 100644 index 0000000000..e804215c11 --- /dev/null +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -0,0 +1,1397 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_lir.h" +#include "codegen_arm.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +/* + * opcode: ArmOpcode enum + * skeleton: pre-designated bit-pattern for this opcode + * k0: key to applying ds/de + * ds: dest start bit position + * de: dest end bit position + * k1: key to applying s1s/s1e + * s1s: src1 start bit position + * s1e: src1 end bit position + * k2: key to applying s2s/s2e + * s2s: src2 start bit position + * s2e: src2 end bit position + * operands: number of operands (for sanity check purposes) + * name: mnemonic name + * fmt: for pretty-printing + */ +#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ + k3, k3s, k3e, flags, name, fmt, size) \ + {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ + {k3, k3s, k3e}}, opcode, flags, name, fmt, size} + +/* Instruction dump string format keys: !pf, where "!" is the start + * of the key, "p" is which numeric operand to use and "f" is the + * print format. + * + * [p]ositions: + * 0 -> operands[0] (dest) + * 1 -> operands[1] (src1) + * 2 -> operands[2] (src2) + * 3 -> operands[3] (extra) + * + * [f]ormats: + * h -> 4-digit hex + * d -> decimal + * E -> decimal*4 + * F -> decimal*2 + * c -> branch condition (beq, bne, etc.) + * t -> pc-relative target + * u -> 1st half of bl[x] target + * v -> 2nd half ob bl[x] target + * R -> register list + * s -> single precision floating point register + * S -> double precision floating point register + * m -> Thumb2 modified immediate + * n -> complimented Thumb2 modified immediate + * M -> Thumb2 16-bit zero-extended immediate + * b -> 4-digit binary + * B -> dmb option string (sy, st, ish, ishst, nsh, hshst) + * H -> operand shift + * C -> core register name + * P -> fp cs register list (base of s16) + * Q -> fp cs register list (base of s0) + * + * [!] escape. To insert "!", use "!!" + */ +/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */ +const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { + ENCODING_MAP(kArm16BitData, 0x0000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2), + ENCODING_MAP(kThumbAdcRR, 0x4140, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES, + "adcs", "!0C, !1C", 2), + ENCODING_MAP(kThumbAddRRI3, 0x1c00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "adds", "!0C, !1C, #!2d", 2), + ENCODING_MAP(kThumbAddRI8, 0x3000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, + "adds", "!0C, !0C, #!1d", 2), + ENCODING_MAP(kThumbAddRRR, 0x1800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, + "adds", "!0C, !1C, !2C", 2), + ENCODING_MAP(kThumbAddRRLH, 0x4440, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, + "add", "!0C, !1C", 2), + ENCODING_MAP(kThumbAddRRHL, 0x4480, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, + "add", "!0C, !1C", 2), + ENCODING_MAP(kThumbAddRRHH, 0x44c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, + "add", "!0C, !1C", 2), + ENCODING_MAP(kThumbAddPcRel, 0xa000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP, + "add", "!0C, pc, #!1E", 2), + ENCODING_MAP(kThumbAddSpRel, 0xa800, + kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP, + "add", "!0C, sp, #!2E", 2), + ENCODING_MAP(kThumbAddSpI7, 0xb000, + kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, + "add", "sp, #!0d*4", 2), + ENCODING_MAP(kThumbAndRR, 0x4000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "ands", "!0C, !1C", 2), + ENCODING_MAP(kThumbAsrRRI5, 0x1000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "asrs", "!0C, !1C, #!2d", 2), + ENCODING_MAP(kThumbAsrRR, 0x4100, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "asrs", "!0C, !1C", 2), + ENCODING_MAP(kThumbBCond, 0xd000, + kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | + NEEDS_FIXUP, "b!1c", "!0t", 2), + ENCODING_MAP(kThumbBUncond, 0xe000, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, + "b", "!0t", 2), + ENCODING_MAP(kThumbBicRR, 0x4380, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "bics", "!0C, !1C", 2), + ENCODING_MAP(kThumbBkpt, 0xbe00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, + "bkpt", "!0d", 2), + ENCODING_MAP(kThumbBlx1, 0xf000, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | + NEEDS_FIXUP, "blx_1", "!0u", 2), + ENCODING_MAP(kThumbBlx2, 0xe800, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | + NEEDS_FIXUP, "blx_2", "!0v", 2), + ENCODING_MAP(kThumbBl1, 0xf000, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, + "bl_1", "!0u", 2), + ENCODING_MAP(kThumbBl2, 0xf800, + kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, + "bl_2", "!0v", 2), + ENCODING_MAP(kThumbBlxR, 0x4780, + kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR, + "blx", "!0C", 2), + ENCODING_MAP(kThumbBx, 0x4700, + kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, + "bx", "!0C", 2), + ENCODING_MAP(kThumbCmnRR, 0x42c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmn", "!0C, !1C", 2), + ENCODING_MAP(kThumbCmpRI8, 0x2800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, + "cmp", "!0C, #!1d", 2), + ENCODING_MAP(kThumbCmpRR, 0x4280, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2), + ENCODING_MAP(kThumbCmpLH, 0x4540, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2), + ENCODING_MAP(kThumbCmpHL, 0x4580, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2), + ENCODING_MAP(kThumbCmpHH, 0x45c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 2), + ENCODING_MAP(kThumbEorRR, 0x4040, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "eors", "!0C, !1C", 2), + ENCODING_MAP(kThumbLdmia, 0xc800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, + "ldmia", "!0C!!, <!1R>", 2), + ENCODING_MAP(kThumbLdrRRI5, 0x6800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldr", "!0C, [!1C, #!2E]", 2), + ENCODING_MAP(kThumbLdrRRR, 0x5800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldr", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbLdrPcRel, 0x4800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC + | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2), + ENCODING_MAP(kThumbLdrSpRel, 0x9800, + kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP + | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2), + ENCODING_MAP(kThumbLdrbRRI5, 0x7800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrb", "!0C, [!1C, #2d]", 2), + ENCODING_MAP(kThumbLdrbRRR, 0x5c00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrb", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbLdrhRRI5, 0x8800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrh", "!0C, [!1C, #!2F]", 2), + ENCODING_MAP(kThumbLdrhRRR, 0x5a00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrh", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbLdrsbRRR, 0x5600, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsb", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbLdrshRRR, 0x5e00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsh", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbLslRRI5, 0x0000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "lsls", "!0C, !1C, #!2d", 2), + ENCODING_MAP(kThumbLslRR, 0x4080, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "lsls", "!0C, !1C", 2), + ENCODING_MAP(kThumbLsrRRI5, 0x0800, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "lsrs", "!0C, !1C, #!2d", 2), + ENCODING_MAP(kThumbLsrRR, 0x40c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "lsrs", "!0C, !1C", 2), + ENCODING_MAP(kThumbMovImm, 0x2000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | SETS_CCODES, + "movs", "!0C, #!1d", 2), + ENCODING_MAP(kThumbMovRR, 0x1c00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "movs", "!0C, !1C", 2), + ENCODING_MAP(kThumbMovRR_H2H, 0x46c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 2), + ENCODING_MAP(kThumbMovRR_H2L, 0x4640, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 2), + ENCODING_MAP(kThumbMovRR_L2H, 0x4680, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 2), + ENCODING_MAP(kThumbMul, 0x4340, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "muls", "!0C, !1C", 2), + ENCODING_MAP(kThumbMvn, 0x43c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "mvns", "!0C, !1C", 2), + ENCODING_MAP(kThumbNeg, 0x4240, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "negs", "!0C, !1C", 2), + ENCODING_MAP(kThumbOrr, 0x4300, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "orrs", "!0C, !1C", 2), + ENCODING_MAP(kThumbPop, 0xbc00, + kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 + | IS_LOAD, "pop", "<!0R>", 2), + ENCODING_MAP(kThumbPush, 0xb400, + kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 + | IS_STORE, "push", "<!0R>", 2), + ENCODING_MAP(kThumbRorRR, 0x41c0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, + "rors", "!0C, !1C", 2), + ENCODING_MAP(kThumbSbc, 0x4180, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES, + "sbcs", "!0C, !1C", 2), + ENCODING_MAP(kThumbStmia, 0xc000, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE, + "stmia", "!0C!!, <!1R>", 2), + ENCODING_MAP(kThumbStrRRI5, 0x6000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0C, [!1C, #!2E]", 2), + ENCODING_MAP(kThumbStrRRR, 0x5000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "str", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbStrSpRel, 0x9000, + kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP + | IS_STORE, "str", "!0C, [sp, #!2E]", 2), + ENCODING_MAP(kThumbStrbRRI5, 0x7000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strb", "!0C, [!1C, #!2d]", 2), + ENCODING_MAP(kThumbStrbRRR, 0x5400, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "strb", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbStrhRRI5, 0x8000, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strh", "!0C, [!1C, #!2F]", 2), + ENCODING_MAP(kThumbStrhRRR, 0x5200, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, + "strh", "!0C, [!1C, !2C]", 2), + ENCODING_MAP(kThumbSubRRI3, 0x1e00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "subs", "!0C, !1C, #!2d", 2), + ENCODING_MAP(kThumbSubRI8, 0x3800, + kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, + "subs", "!0C, #!1d", 2), + ENCODING_MAP(kThumbSubRRR, 0x1a00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, + "subs", "!0C, !1C, !2C", 2), + ENCODING_MAP(kThumbSubSpI7, 0xb080, + kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, + "sub", "sp, #!0d*4", 2), + ENCODING_MAP(kThumbSwi, 0xdf00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, + "swi", "!0d", 2), + ENCODING_MAP(kThumbTst, 0x4200, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES, + "tst", "!0C, !1C", 2), + ENCODING_MAP(kThumb2Vldrs, 0xed900a00, + kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4), + ENCODING_MAP(kThumb2Vldrd, 0xed900b00, + kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4), + ENCODING_MAP(kThumb2Vmuls, 0xee200a00, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, + "vmuls", "!0s, !1s, !2s", 4), + ENCODING_MAP(kThumb2Vmuld, 0xee200b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vmuld", "!0S, !1S, !2S", 4), + ENCODING_MAP(kThumb2Vstrs, 0xed800a00, + kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "vstr", "!0s, [!1C, #!2E]", 4), + ENCODING_MAP(kThumb2Vstrd, 0xed800b00, + kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "vstr", "!0S, [!1C, #!2E]", 4), + ENCODING_MAP(kThumb2Vsubs, 0xee300a40, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vsub", "!0s, !1s, !2s", 4), + ENCODING_MAP(kThumb2Vsubd, 0xee300b40, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vsub", "!0S, !1S, !2S", 4), + ENCODING_MAP(kThumb2Vadds, 0xee300a00, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vadd", "!0s, !1s, !2s", 4), + ENCODING_MAP(kThumb2Vaddd, 0xee300b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vadd", "!0S, !1S, !2S", 4), + ENCODING_MAP(kThumb2Vdivs, 0xee800a00, + kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vdivs", "!0s, !1s, !2s", 4), + ENCODING_MAP(kThumb2Vdivd, 0xee800b00, + kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "vdivd", "!0S, !1S, !2S", 4), + ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f32", "!0s, !1s", 4), + ENCODING_MAP(kThumb2VcvtID, 0xeeb80bc0, + kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f64", "!0S, !1s", 4), + ENCODING_MAP(kThumb2VcvtFI, 0xeebd0ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.s32.f32 ", "!0s, !1s", 4), + ENCODING_MAP(kThumb2VcvtDI, 0xeebd0bc0, + kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.s32.f64 ", "!0s, !1S", 4), + ENCODING_MAP(kThumb2VcvtFd, 0xeeb70ac0, + kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f64.f32 ", "!0S, !1s", 4), + ENCODING_MAP(kThumb2VcvtDF, 0xeeb70bc0, + kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vcvt.f32.f64 ", "!0s, !1S", 4), + ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vsqrt.f32 ", "!0s, !1s", 4), + ENCODING_MAP(kThumb2Vsqrtd, 0xeeb10bc0, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vsqrt.f64 ", "!0S, !1S", 4), + ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */ + kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "mov", "!0C, #!1m", 4), + ENCODING_MAP(kThumb2MovImm16, 0xf2400000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "mov", "!0C, #!1M", 4), + ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldr", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "str", "!0C, [!1C, #-!2d]", 4), + ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldr", "!0C, [!1C, #-!2d]", 4), + ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */ + kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | + NEEDS_FIXUP, "cbnz", "!0C,!1t", 2), + ENCODING_MAP(kThumb2Cbz, 0xb100, /* Note: does not affect flags */ + kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | + NEEDS_FIXUP, "cbz", "!0C,!1t", 2), + ENCODING_MAP(kThumb2AddRRI12, 0xf2000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ + "add", "!0C,!1C,#!2d", 4), + ENCODING_MAP(kThumb2MovRR, 0xea4f0000, /* no setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov", "!0C, !1C", 4), + ENCODING_MAP(kThumb2Vmovs, 0xeeb00a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vmov.f32 ", " !0s, !1s", 4), + ENCODING_MAP(kThumb2Vmovd, 0xeeb00b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vmov.f64 ", " !0S, !1S", 4), + ENCODING_MAP(kThumb2Ldmia, 0xe8900000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, + "ldmia", "!0C!!, <!1R>", 4), + ENCODING_MAP(kThumb2Stmia, 0xe8800000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE, + "stmia", "!0C!!, <!1R>", 4), + ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "adds", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2SubRRR, 0xebb00000, /* setflags enconding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "subs", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2SbcRRR, 0xeb700000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES, + "sbcs", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2CmpRR, 0xebb00f00, + kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, + "cmp", "!0C, !1C", 4), + ENCODING_MAP(kThumb2SubRRI12, 0xf2a00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ + "sub", "!0C,!1C,#!2d", 4), + ENCODING_MAP(kThumb2MvnImm12, 0xf06f0000, /* no setflags encoding */ + kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "mvn", "!0C, #!1n", 4), + ENCODING_MAP(kThumb2Sel, 0xfaa0f080, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES, + "sel", "!0C, !1C, !2C", 4), + ENCODING_MAP(kThumb2Ubfx, 0xf3c00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, + kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, + "ubfx", "!0C, !1C, #!2d, #!3d", 4), + ENCODING_MAP(kThumb2Sbfx, 0xf3400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, + kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, + "sbfx", "!0C, !1C, #!2d, #!3d", 4), + ENCODING_MAP(kThumb2LdrRRR, 0xf8500000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, + "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2StrRRR, 0xf8400000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, + "str", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2StrhRRR, 0xf8200000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, + "strh", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2StrbRRR, 0xf8000000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, + "strb", "!0C, [!1C, !2C, LSL #!3d]", 4), + ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrh", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrsh", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrb", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrsb", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strh", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, + "strb", "!0C, [!1C, #!2d]", 4), + ENCODING_MAP(kThumb2Pop, 0xe8bd0000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 + | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4), + ENCODING_MAP(kThumb2Push, 0xe92d0000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 + | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4), + ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00, + kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | SETS_CCODES, + "cmp", "!0C, #!1m", 4), + ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "adcs", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2AndRRR, 0xea000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "and", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2BicRRR, 0xea200000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "bic", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2CmnRR, 0xeb000000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "cmn", "!0C, !1C, shift !2d", 4), + ENCODING_MAP(kThumb2EorRRR, 0xea800000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "eor", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2MulRRR, 0xfb00f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul", "!0C, !1C, !2C", 4), + ENCODING_MAP(kThumb2MnvRR, 0xea6f0000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "mvn", "!0C, !1C, shift !2d", 4), + ENCODING_MAP(kThumb2RsubRRI8, 0xf1d00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "rsb", "!0C,!1C,#!2m", 4), + ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "neg", "!0C,!1C", 4), + ENCODING_MAP(kThumb2OrrRRR, 0xea400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, + "orr", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2TstRR, 0xea100f00, + kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, + "tst", "!0C, !1C, shift !2d", 4), + ENCODING_MAP(kThumb2LslRRR, 0xfa00f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "lsl", "!0C, !1C, !2C", 4), + ENCODING_MAP(kThumb2LsrRRR, 0xfa20f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "lsr", "!0C, !1C, !2C", 4), + ENCODING_MAP(kThumb2AsrRRR, 0xfa40f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "asr", "!0C, !1C, !2C", 4), + ENCODING_MAP(kThumb2RorRRR, 0xfa60f000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "ror", "!0C, !1C, !2C", 4), + ENCODING_MAP(kThumb2LslRRI5, 0xea4f0000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "lsl", "!0C, !1C, #!2d", 4), + ENCODING_MAP(kThumb2LsrRRI5, 0xea4f0010, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "lsr", "!0C, !1C, #!2d", 4), + ENCODING_MAP(kThumb2AsrRRI5, 0xea4f0020, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "asr", "!0C, !1C, #!2d", 4), + ENCODING_MAP(kThumb2RorRRI5, 0xea4f0030, + kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "ror", "!0C, !1C, #!2d", 4), + ENCODING_MAP(kThumb2BicRRI8, 0xf0200000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "bic", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2AndRRI8, 0xf0000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "and", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2OrrRRI8, 0xf0400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "orr", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2EorRRI8, 0xf0800000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "eor", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2AddRRI8, 0xf1100000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "adds", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2AdcRRI8, 0xf1500000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, + "adcs", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2SubRRI8, 0xf1b00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "subs", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2SbcRRI8, 0xf1700000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, + "sbcs", "!0C, !1C, #!2m", 4), + ENCODING_MAP(kThumb2It, 0xbf00, + kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES, + "it:!1b", "!0c", 2), + ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES, + "fmstat", "", 4), + ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, + "vcmp.f64", "!0S, !1S", 4), + ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, + "vcmp.f32", "!0s, !1s", 4), + ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldr", "!0C, [r15pc, #!1d]", 4), + ENCODING_MAP(kThumb2BCond, 0xf0008000, + kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP, + "b!1c", "!0t", 4), + ENCODING_MAP(kThumb2Vmovd_RR, 0xeeb00b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vmov.f64", "!0S, !1S", 4), + ENCODING_MAP(kThumb2Vmovs_RR, 0xeeb00a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vmov.f32", "!0s, !1s", 4), + ENCODING_MAP(kThumb2Fmrs, 0xee100a10, + kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "fmrs", "!0C, !1s", 4), + ENCODING_MAP(kThumb2Fmsr, 0xee000a10, + kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "fmsr", "!0s, !1C", 4), + ENCODING_MAP(kThumb2Fmrrd, 0xec500b10, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2, + "fmrrd", "!0C, !1C, !2S", 4), + ENCODING_MAP(kThumb2Fmdrr, 0xec400b10, + kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "fmdrr", "!0S, !1C, !2C", 4), + ENCODING_MAP(kThumb2Vabsd, 0xeeb00bc0, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vabs.f64", "!0S, !1S", 4), + ENCODING_MAP(kThumb2Vabss, 0xeeb00ac0, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vabs.f32", "!0s, !1s", 4), + ENCODING_MAP(kThumb2Vnegd, 0xeeb10b40, + kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vneg.f64", "!0S, !1S", 4), + ENCODING_MAP(kThumb2Vnegs, 0xeeb10a40, + kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "vneg.f32", "!0s, !1s", 4), + ENCODING_MAP(kThumb2Vmovs_IMM8, 0xeeb00a00, + kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "vmov.f32", "!0s, #0x!1h", 4), + ENCODING_MAP(kThumb2Vmovd_IMM8, 0xeeb00b00, + kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "vmov.f64", "!0S, #0x!1h", 4), + ENCODING_MAP(kThumb2Mla, 0xfb000000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtBitBlt, 15, 12, + IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3, + "mla", "!0C, !1C, !2C, !3C", 4), + ENCODING_MAP(kThumb2Umull, 0xfba00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 3, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, + "umull", "!0C, !1C, !2C, !3C", 4), + ENCODING_MAP(kThumb2Ldrex, 0xe8500f00, + kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, + "ldrex", "!0C, [!1C, #!2E]", 4), + ENCODING_MAP(kThumb2Strex, 0xe8400000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, + "strex", "!0C,!1C, [!2C, #!2E]", 4), + ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "clrex", "", 4), + ENCODING_MAP(kThumb2Bfi, 0xf3600000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1, + kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, + "bfi", "!0C,!1C,#!2d,#!3d", 4), + ENCODING_MAP(kThumb2Bfc, 0xf36f0000, + kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, + "bfc", "!0C,#!1d,#!2d", 4), + ENCODING_MAP(kThumb2Dmb, 0xf3bf8f50, + kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, + "dmb","#!0B",4), + ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, + "ldr", "!0C, [r15pc, -#!1d]", 4), + ENCODING_MAP(kThumb2Stm, 0xe9000000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE, + "stm", "!0C, <!1R>", 4), + ENCODING_MAP(kThumbUndefined, 0xde00, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "undefined", "", 2), + // NOTE: vpop, vpush hard-encoded for s16+ reg list + ENCODING_MAP(kThumb2VPopCS, 0xecbd8a00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0 + | IS_LOAD, "vpop", "<!0P>", 4), + ENCODING_MAP(kThumb2VPushCS, 0xed2d8a00, + kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0 + | IS_STORE, "vpush", "<!0P>", 4), + ENCODING_MAP(kThumb2Vldms, 0xec900a00, + kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2 + | IS_LOAD, "vldms", "!0C, <!2Q>", 4), + ENCODING_MAP(kThumb2Vstms, 0xec800a00, + kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2 + | IS_STORE, "vstms", "!0C, <!2Q>", 4), + ENCODING_MAP(kThumb2BUncond, 0xf0009000, + kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, + "b", "!0t", 4), + ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, + "movt", "!0C, #!1M", 4), + ENCODING_MAP(kThumb2AddPCR, 0x4487, + kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_USE0 | IS_BRANCH, + "add", "rPC, !0C", 2), + ENCODING_MAP(kThumb2Adr, 0xf20f0000, + kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + /* Note: doesn't affect flags */ + IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, + "adr", "!0C,#!1d", 4), + ENCODING_MAP(kThumb2MovImm16LST, 0xf2400000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, + "mov", "!0C, #!1M", 4), + ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, + kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, + "movt", "!0C, #!1M", 4), + ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, + kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, + "ldmia", "!0C!!, <!1R>", 4), + ENCODING_MAP(kThumb2SubsRRI12, 0xf1b00000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, + "subs", "!0C,!1C,#!2d", 4), + ENCODING_MAP(kThumb2OrrRRRs, 0xea500000, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "orrs", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2Push1, 0xf84d0d04, + kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0 + | IS_STORE, "push1", "!0C", 4), + ENCODING_MAP(kThumb2Pop1, 0xf85d0b04, + kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0 + | IS_LOAD, "pop1", "!0C", 4), + ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */ + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtShift, -1, -1, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + "rsbs", "!0C, !1C, !2C!3H", 4), + ENCODING_MAP(kThumb2Smull, 0xfb800000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 3, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, + "smull", "!0C, !1C, !2C, !3C", 4), + ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, + "ldrd", "!0C, !1C, [pc, #!2E]", 4), + ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, + "ldrd", "!0C, !1C, [!2C, #!3E]", 4), + ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtBitBlt, 7, 0, + IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, + "strd", "!0C, !1C, [!2C, #!3E]", 4), +}; + +/* + * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is + * not ready. Since r5FP is not updated often, it is less likely to + * generate unnecessary stall cycles. + * TUNING: No longer true - find new NOP pattern. + */ +#define PADDING_MOV_R5_R5 0x1C2D + +/* + * Assemble the LIR into binary instruction format. Note that we may + * discover that pc-relative displacements may not fit the selected + * instruction. + */ +AssemblerStatus ArmMir2Lir::AssembleInstructions(uintptr_t start_addr) +{ + LIR* lir; + AssemblerStatus res = kSuccess; // Assume success + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + + if (lir->opcode < 0) { + /* 1 means padding is needed */ + if ((lir->opcode == kPseudoPseudoAlign4) && (lir->operands[0] == 1)) { + code_buffer_.push_back(PADDING_MOV_R5_R5 & 0xFF); + code_buffer_.push_back((PADDING_MOV_R5_R5 >> 8) & 0xFF); + } + continue; + } + + if (lir->flags.is_nop) { + continue; + } + + /* + * For PC-relative displacements we won't know if the + * selected instruction will work until late (i.e. - now). + * If something doesn't fit, we must replace the short-form + * operation with a longer-form one. Note, though, that this + * can change code we've already processed, so we'll need to + * re-calculate offsets and restart. To limit the number of + * restarts, the entire list will be scanned and patched. + * Of course, the patching itself may cause new overflows so this + * is an iterative process. + */ + if (lir->flags.pcRelFixup) { + if (lir->opcode == kThumbLdrPcRel || + lir->opcode == kThumb2LdrPcRel12 || + lir->opcode == kThumbAddPcRel || + lir->opcode == kThumb2LdrdPcRel8 || + ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || + ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { + /* + * PC-relative loads are mostly used to load immediates + * that are too large to materialize directly in one shot. + * However, if the load displacement exceeds the limit, + * we revert to a multiple-instruction materialization sequence. + */ + LIR *lir_target = lir->target; + uintptr_t pc = (lir->offset + 4) & ~3; + uintptr_t target = lir_target->offset; + int delta = target - pc; + if (delta & 0x3) { + LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; + } + // First, a sanity check for cases we shouldn't see now + if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || + ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) { + // Shouldn't happen in current codegen. + LOG(FATAL) << "Unexpected pc-rel offset " << delta; + } + // Now, check for the difficult cases + if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { + /* + * Note: because rARM_LR may be used to fix up out-of-range + * vldrs/vldrd we include REG_DEF_LR in the resource + * masks for these instructions. + */ + int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12)) + ? lir->operands[0] : rARM_LR; + + // Add new Adr to generate the address. + LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, + base_reg, 0, 0, 0, 0, lir->target); + InsertLIRBefore(lir, new_adr); + + // Convert to normal load. + if (lir->opcode == kThumb2LdrPcRel12) { + lir->opcode = kThumb2LdrRRI12; + } else if (lir->opcode == kThumb2LdrdPcRel8) { + lir->opcode = kThumb2LdrdI8; + } + // Change the load to be relative to the new Adr base. + if (lir->opcode == kThumb2LdrdI8) { + lir->operands[3] = 0; + lir->operands[2] = base_reg; + } else { + lir->operands[2] = 0; + lir->operands[1] = base_reg; + } + SetupResourceMasks(lir); + res = kRetryAll; + } else { + if ((lir->opcode == kThumb2Vldrs) || + (lir->opcode == kThumb2Vldrd) || + (lir->opcode == kThumb2LdrdPcRel8)) { + lir->operands[2] = delta >> 2; + } else { + lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : + delta >> 2; + } + } + } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) { + LIR *target_lir = lir->target; + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + int delta = target - pc; + if (delta > 126 || delta < 0) { + /* + * Convert to cmp rx,#0 / b[eq/ne] tgt pair + * Make new branch instruction and insert after + */ + LIR* new_inst = + RawLIR(lir->dalvik_offset, kThumbBCond, 0, + (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, + 0, 0, 0, lir->target); + InsertLIRAfter(lir, new_inst); + /* Convert the cb[n]z to a cmp rx, #0 ] */ + lir->opcode = kThumbCmpRI8; + /* operand[0] is src1 in both cb[n]z & CmpRI8 */ + lir->operands[1] = 0; + lir->target = 0; + SetupResourceMasks(lir); + res = kRetryAll; + } else { + lir->operands[1] = delta >> 1; + } + } else if (lir->opcode == kThumb2Push || lir->opcode == kThumb2Pop) { + if (__builtin_popcount(lir->operands[0]) == 1) { + /* + * The standard push/pop multiple instruction + * requires at least two registers in the list. + * If we've got just one, switch to the single-reg + * encoding. + */ + lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : + kThumb2Pop1; + int reg = 0; + while (lir->operands[0]) { + if (lir->operands[0] & 0x1) { + break; + } else { + reg++; + lir->operands[0] >>= 1; + } + } + lir->operands[0] = reg; + SetupResourceMasks(lir); + res = kRetryAll; + } + } else if (lir->opcode == kThumbBCond || lir->opcode == kThumb2BCond) { + LIR *target_lir = lir->target; + int delta = 0; + DCHECK(target_lir); + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + delta = target - pc; + if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { + lir->opcode = kThumb2BCond; + SetupResourceMasks(lir); + res = kRetryAll; + } + lir->operands[0] = delta >> 1; + } else if (lir->opcode == kThumb2BUncond) { + LIR *target_lir = lir->target; + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + int delta = target - pc; + lir->operands[0] = delta >> 1; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && + lir->operands[0] == 0) { // Useless branch + lir->flags.is_nop = true; + res = kRetryAll; + } + } else if (lir->opcode == kThumbBUncond) { + LIR *target_lir = lir->target; + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + int delta = target - pc; + if (delta > 2046 || delta < -2048) { + // Convert to Thumb2BCond w/ kArmCondAl + lir->opcode = kThumb2BUncond; + lir->operands[0] = 0; + SetupResourceMasks(lir); + res = kRetryAll; + } else { + lir->operands[0] = delta >> 1; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && + lir->operands[0] == -1) { // Useless branch + lir->flags.is_nop = true; + res = kRetryAll; + } + } + } else if (lir->opcode == kThumbBlx1) { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); + /* cur_pc is Thumb */ + uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3; + uintptr_t target = lir->operands[1]; + + /* Match bit[1] in target with base */ + if (cur_pc & 0x2) { + target |= 0x2; + } + int delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + } else if (lir->opcode == kThumbBl1) { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); + /* Both cur_pc and target are Thumb */ + uintptr_t cur_pc = start_addr + lir->offset + 4; + uintptr_t target = lir->operands[1]; + + int delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + } else if (lir->opcode == kThumb2Adr) { + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]); + LIR* target = lir->target; + int target_disp = tab_rec ? tab_rec->offset + : target->offset; + int disp = target_disp - ((lir->offset + 4) & ~3); + if (disp < 4096) { + lir->operands[1] = disp; + } else { + // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] + // TUNING: if this case fires often, it can be improved. Not expected to be common. + LIR *new_mov16L = + RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, + lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir), + reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target); + InsertLIRBefore(lir, new_mov16L); + LIR *new_mov16H = + RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, + lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir), + reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target); + InsertLIRBefore(lir, new_mov16H); + if (ARM_LOWREG(lir->operands[0])) { + lir->opcode = kThumbAddRRLH; + } else { + lir->opcode = kThumbAddRRHH; + } + lir->operands[1] = rARM_PC; + SetupResourceMasks(lir); + res = kRetryAll; + } + } else if (lir->opcode == kThumb2MovImm16LST) { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]); + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; + } else if (lir->opcode == kThumb2MovImm16HST) { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]); + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = + ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; + } + } + /* + * If one of the pc-relative instructions expanded we'll have + * to make another pass. Don't bother to fully assemble the + * instruction. + */ + if (res != kSuccess) { + continue; + } + const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + uint32_t bits = encoder->skeleton; + int i; + for (i = 0; i < 4; i++) { + uint32_t operand; + uint32_t value; + operand = lir->operands[i]; + switch (encoder->field_loc[i].kind) { + case kFmtUnused: + break; + case kFmtFPImm: + value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; + value |= (operand & 0x0F) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtBrOffset: + value = ((operand & 0x80000) >> 19) << 26; + value |= ((operand & 0x40000) >> 18) << 11; + value |= ((operand & 0x20000) >> 17) << 13; + value |= ((operand & 0x1f800) >> 11) << 16; + value |= (operand & 0x007ff); + bits |= value; + break; + case kFmtShift5: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtShift: + value = ((operand & 0x70) >> 4) << 12; + value |= (operand & 0x0f) << 4; + bits |= value; + break; + case kFmtBWidth: + value = operand - 1; + bits |= value; + break; + case kFmtLsb: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtImm6: + value = ((operand & 0x20) >> 5) << 9; + value |= (operand & 0x1f) << 3; + bits |= value; + break; + case kFmtBitBlt: + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; + break; + case kFmtDfp: { + DCHECK(ARM_DOUBLEREG(operand)); + DCHECK_EQ((operand & 0x1), 0U); + int reg_name = (operand & ARM_FP_REG_MASK) >> 1; + /* Snag the 1-bit slice and position it */ + value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= (reg_name & 0x0f) << encoder->field_loc[i].start; + bits |= value; + break; + } + case kFmtSfp: + DCHECK(ARM_SINGLEREG(operand)); + /* Snag the 1-bit slice and position it */ + value = (operand & 0x1) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtImm12: + case kFmtModImm: + value = ((operand & 0x800) >> 11) << 26; + value |= ((operand & 0x700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtImm16: + value = ((operand & 0x0800) >> 11) << 26; + value |= ((operand & 0xf000) >> 12) << 16; + value |= ((operand & 0x0700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtOff24: { + uint32_t signbit = (operand >> 31) & 0x1; + uint32_t i1 = (operand >> 22) & 0x1; + uint32_t i2 = (operand >> 21) & 0x1; + uint32_t imm10 = (operand >> 11) & 0x03ff; + uint32_t imm11 = operand & 0x07ff; + uint32_t j1 = (i1 ^ signbit) ? 0 : 1; + uint32_t j2 = (i2 ^ signbit) ? 0 : 1; + value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | + imm11; + bits |= value; + } + break; + default: + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + } + } + if (encoder->size == 4) { + code_buffer_.push_back((bits >> 16) & 0xff); + code_buffer_.push_back((bits >> 24) & 0xff); + } + code_buffer_.push_back(bits & 0xff); + code_buffer_.push_back((bits >> 8) & 0xff); + } + return res; +} + +int ArmMir2Lir::GetInsnSize(LIR* lir) +{ + return EncodingMap[lir->opcode].size; +} + +} // namespace art diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc new file mode 100644 index 0000000000..a6720ce6f2 --- /dev/null +++ b/compiler/dex/quick/arm/call_arm.cc @@ -0,0 +1,655 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the Thumb2 ISA. */ + +#include "arm_lir.h" +#include "codegen_arm.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "oat/runtime/oat_support_entrypoints.h" + +namespace art { + + +/* Return the position of an ssa name within the argument list */ +int ArmMir2Lir::InPosition(int s_reg) +{ + int v_reg = mir_graph_->SRegToVReg(s_reg); + return v_reg - cu_->num_regs; +} + +/* + * Describe an argument. If it's already in an arg register, just leave it + * there. NOTE: all live arg registers must be locked prior to this call + * to avoid having them allocated as a temp by downstream utilities. + */ +RegLocation ArmMir2Lir::ArgLoc(RegLocation loc) +{ + int arg_num = InPosition(loc.s_reg_low); + if (loc.wide) { + if (arg_num == 2) { + // Bad case - half in register, half in frame. Just punt + loc.location = kLocInvalid; + } else if (arg_num < 2) { + loc.low_reg = rARM_ARG1 + arg_num; + loc.high_reg = loc.low_reg + 1; + loc.location = kLocPhysReg; + } else { + loc.location = kLocDalvikFrame; + } + } else { + if (arg_num < 3) { + loc.low_reg = rARM_ARG1 + arg_num; + loc.location = kLocPhysReg; + } else { + loc.location = kLocDalvikFrame; + } + } + return loc; +} + +/* + * Load an argument. If already in a register, just return. If in + * the frame, we can't use the normal LoadValue() because it assumed + * a proper frame - and we're frameless. + */ +RegLocation ArmMir2Lir::LoadArg(RegLocation loc) +{ + if (loc.location == kLocDalvikFrame) { + int start = (InPosition(loc.s_reg_low) + 1) * sizeof(uint32_t); + loc.low_reg = AllocTemp(); + LoadWordDisp(rARM_SP, start, loc.low_reg); + if (loc.wide) { + loc.high_reg = AllocTemp(); + LoadWordDisp(rARM_SP, start + sizeof(uint32_t), loc.high_reg); + } + loc.location = kLocPhysReg; + } + return loc; +} + +/* Lock any referenced arguments that arrive in registers */ +void ArmMir2Lir::LockLiveArgs(MIR* mir) +{ + int first_in = cu_->num_regs; + const int num_arg_regs = 3; // TODO: generalize & move to RegUtil.cc + for (int i = 0; i < mir->ssa_rep->num_uses; i++) { + int v_reg = mir_graph_->SRegToVReg(mir->ssa_rep->uses[i]); + int InPosition = v_reg - first_in; + if (InPosition < num_arg_regs) { + LockTemp(rARM_ARG1 + InPosition); + } + } +} + +/* Find the next MIR, which may be in a following basic block */ +// TODO: should this be a utility in mir_graph? +MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) +{ + BasicBlock* bb = *p_bb; + MIR* orig_mir = mir; + while (bb != NULL) { + if (mir != NULL) { + mir = mir->next; + } + if (mir != NULL) { + return mir; + } else { + bb = bb->fall_through; + *p_bb = bb; + if (bb) { + mir = bb->first_mir_insn; + if (mir != NULL) { + return mir; + } + } + } + } + return orig_mir; +} + +/* Used for the "verbose" listing */ +//TODO: move to common code +void ArmMir2Lir::GenPrintLabel(MIR* mir) +{ + /* Mark the beginning of a Dalvik instruction for line tracking */ + char* inst_str = cu_->verbose ? + mir_graph_->GetDalvikDisassembly(mir) : NULL; + MarkBoundary(mir->offset, inst_str); +} + +MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, + OpSize size, bool long_or_double, bool is_object) +{ + int field_offset; + bool is_volatile; + uint32_t field_idx = mir->dalvikInsn.vC; + bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { + return NULL; + } + RegLocation rl_obj = mir_graph_->GetSrc(mir, 0); + LockLiveArgs(mir); + rl_obj = ArmMir2Lir::ArgLoc(rl_obj); + RegLocation rl_dest; + if (long_or_double) { + rl_dest = GetReturnWide(false); + } else { + rl_dest = GetReturn(false); + } + // Point of no return - no aborts after this + ArmMir2Lir::GenPrintLabel(mir); + rl_obj = LoadArg(rl_obj); + GenIGet(field_idx, mir->optimization_flags, size, rl_dest, rl_obj, long_or_double, is_object); + return GetNextMir(bb, mir); +} + +MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir, + OpSize size, bool long_or_double, bool is_object) +{ + int field_offset; + bool is_volatile; + uint32_t field_idx = mir->dalvikInsn.vC; + bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { + return NULL; + } + RegLocation rl_src; + RegLocation rl_obj; + LockLiveArgs(mir); + if (long_or_double) { + rl_src = mir_graph_->GetSrcWide(mir, 0); + rl_obj = mir_graph_->GetSrc(mir, 2); + } else { + rl_src = mir_graph_->GetSrc(mir, 0); + rl_obj = mir_graph_->GetSrc(mir, 1); + } + rl_src = ArmMir2Lir::ArgLoc(rl_src); + rl_obj = ArmMir2Lir::ArgLoc(rl_obj); + // Reject if source is split across registers & frame + if (rl_obj.location == kLocInvalid) { + ResetRegPool(); + return NULL; + } + // Point of no return - no aborts after this + ArmMir2Lir::GenPrintLabel(mir); + rl_obj = LoadArg(rl_obj); + rl_src = LoadArg(rl_src); + GenIPut(field_idx, mir->optimization_flags, size, rl_src, rl_obj, long_or_double, is_object); + return GetNextMir(bb, mir); +} + +MIR* ArmMir2Lir::SpecialIdentity(MIR* mir) +{ + RegLocation rl_src; + RegLocation rl_dest; + bool wide = (mir->ssa_rep->num_uses == 2); + if (wide) { + rl_src = mir_graph_->GetSrcWide(mir, 0); + rl_dest = GetReturnWide(false); + } else { + rl_src = mir_graph_->GetSrc(mir, 0); + rl_dest = GetReturn(false); + } + LockLiveArgs(mir); + rl_src = ArmMir2Lir::ArgLoc(rl_src); + if (rl_src.location == kLocInvalid) { + ResetRegPool(); + return NULL; + } + // Point of no return - no aborts after this + ArmMir2Lir::GenPrintLabel(mir); + rl_src = LoadArg(rl_src); + if (wide) { + StoreValueWide(rl_dest, rl_src); + } else { + StoreValue(rl_dest, rl_src); + } + return mir; +} + +/* + * Special-case code genration for simple non-throwing leaf methods. + */ +void ArmMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, + SpecialCaseHandler special_case) +{ + current_dalvik_offset_ = mir->offset; + MIR* next_mir = NULL; + switch (special_case) { + case kNullMethod: + DCHECK(mir->dalvikInsn.opcode == Instruction::RETURN_VOID); + next_mir = mir; + break; + case kConstFunction: + ArmMir2Lir::GenPrintLabel(mir); + LoadConstant(rARM_RET0, mir->dalvikInsn.vB); + next_mir = GetNextMir(&bb, mir); + break; + case kIGet: + next_mir = SpecialIGet(&bb, mir, kWord, false, false); + break; + case kIGetBoolean: + case kIGetByte: + next_mir = SpecialIGet(&bb, mir, kUnsignedByte, false, false); + break; + case kIGetObject: + next_mir = SpecialIGet(&bb, mir, kWord, false, true); + break; + case kIGetChar: + next_mir = SpecialIGet(&bb, mir, kUnsignedHalf, false, false); + break; + case kIGetShort: + next_mir = SpecialIGet(&bb, mir, kSignedHalf, false, false); + break; + case kIGetWide: + next_mir = SpecialIGet(&bb, mir, kLong, true, false); + break; + case kIPut: + next_mir = SpecialIPut(&bb, mir, kWord, false, false); + break; + case kIPutBoolean: + case kIPutByte: + next_mir = SpecialIPut(&bb, mir, kUnsignedByte, false, false); + break; + case kIPutObject: + next_mir = SpecialIPut(&bb, mir, kWord, false, true); + break; + case kIPutChar: + next_mir = SpecialIPut(&bb, mir, kUnsignedHalf, false, false); + break; + case kIPutShort: + next_mir = SpecialIPut(&bb, mir, kSignedHalf, false, false); + break; + case kIPutWide: + next_mir = SpecialIPut(&bb, mir, kLong, true, false); + break; + case kIdentity: + next_mir = SpecialIdentity(mir); + break; + default: + return; + } + if (next_mir != NULL) { + current_dalvik_offset_ = next_mir->offset; + if (special_case != kIdentity) { + ArmMir2Lir::GenPrintLabel(next_mir); + } + NewLIR1(kThumbBx, rARM_LR); + core_spill_mask_ = 0; + num_core_spills_ = 0; + fp_spill_mask_ = 0; + num_fp_spills_ = 0; + frame_size_ = 0; + core_vmap_table_.clear(); + fp_vmap_table_.clear(); + } +} + +/* + * The sparse table in the literal pool is an array of <key,displacement> + * pairs. For each set, we'll load them as a pair using ldmia. + * This means that the register number of the temp we use for the key + * must be lower than the reg for the displacement. + * + * The test loop will look something like: + * + * adr rBase, <table> + * ldr r_val, [rARM_SP, v_reg_off] + * mov r_idx, #table_size + * lp: + * ldmia rBase!, {r_key, r_disp} + * sub r_idx, #1 + * cmp r_val, r_key + * ifeq + * add rARM_PC, r_disp ; This is the branch from which we compute displacement + * cbnz r_idx, lp + */ +void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpSparseSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + int size = table[1]; + tab_rec->targets = static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true, + ArenaAllocator::kAllocLIR)); + switch_tables_.Insert(tab_rec); + + // Get the switch value + rl_src = LoadValue(rl_src, kCoreReg); + int rBase = AllocTemp(); + /* Allocate key and disp temps */ + int r_key = AllocTemp(); + int r_disp = AllocTemp(); + // Make sure r_key's register number is less than r_disp's number for ldmia + if (r_key > r_disp) { + int tmp = r_disp; + r_disp = r_key; + r_key = tmp; + } + // Materialize a pointer to the switch table + NewLIR3(kThumb2Adr, rBase, 0, reinterpret_cast<uintptr_t>(tab_rec)); + // Set up r_idx + int r_idx = AllocTemp(); + LoadConstant(r_idx, size); + // Establish loop branch target + LIR* target = NewLIR0(kPseudoTargetLabel); + // Load next key/disp + NewLIR2(kThumb2LdmiaWB, rBase, (1 << r_key) | (1 << r_disp)); + OpRegReg(kOpCmp, r_key, rl_src.low_reg); + // Go if match. NOTE: No instruction set switch here - must stay Thumb2 + OpIT(kCondEq, ""); + LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp); + tab_rec->anchor = switch_branch; + // Needs to use setflags encoding here + NewLIR3(kThumb2SubsRRI12, r_idx, r_idx, 1); + OpCondBranch(kCondNe, target); +} + + +void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpPackedSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + int size = table[1]; + tab_rec->targets = + static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true, ArenaAllocator::kAllocLIR)); + switch_tables_.Insert(tab_rec); + + // Get the switch value + rl_src = LoadValue(rl_src, kCoreReg); + int table_base = AllocTemp(); + // Materialize a pointer to the switch table + NewLIR3(kThumb2Adr, table_base, 0, reinterpret_cast<uintptr_t>(tab_rec)); + int low_key = s4FromSwitchData(&table[2]); + int keyReg; + // Remove the bias, if necessary + if (low_key == 0) { + keyReg = rl_src.low_reg; + } else { + keyReg = AllocTemp(); + OpRegRegImm(kOpSub, keyReg, rl_src.low_reg, low_key); + } + // Bounds check - if < 0 or >= size continue following switch + OpRegImm(kOpCmp, keyReg, size-1); + LIR* branch_over = OpCondBranch(kCondHi, NULL); + + // Load the displacement from the switch table + int disp_reg = AllocTemp(); + LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord); + + // ..and go! NOTE: No instruction set switch here - must stay Thumb2 + LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg); + tab_rec->anchor = switch_branch; + + /* branch_over target here */ + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; +} + +/* + * Array data table format: + * ushort ident = 0x0300 magic value + * ushort width width of each element in the table + * uint size number of elements in the table + * ubyte data[size*width] table of data values (may contain a single-byte + * padding at the end) + * + * Total size is 4+(width * size + 1)/2 16-bit code units. + */ +void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + // Add the table to the list - we'll process it later + FillArrayData *tab_rec = + static_cast<FillArrayData*>(arena_->NewMem(sizeof(FillArrayData), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + uint16_t width = tab_rec->table[1]; + uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16); + tab_rec->size = (size * width) + 8; + + fill_array_data_.Insert(tab_rec); + + // Making a call - use explicit registers + FlushAllRegs(); /* Everything to home location */ + LoadValueDirectFixed(rl_src, r0); + LoadWordDisp(rARM_SELF, ENTRYPOINT_OFFSET(pHandleFillArrayDataFromCode), + rARM_LR); + // Materialize a pointer to the fill data image + NewLIR3(kThumb2Adr, r1, 0, reinterpret_cast<uintptr_t>(tab_rec)); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); +} + +/* + * Handle simple case (thin lock) inline. If it's complicated, bail + * out to the heavyweight lock/unlock routines. We'll use dedicated + * registers here in order to be in the right position in case we + * to bail to oat[Lock/Unlock]Object(self, object) + * + * r0 -> self pointer [arg0 for oat[Lock/Unlock]Object + * r1 -> object [arg1 for oat[Lock/Unlock]Object + * r2 -> intial contents of object->lock, later result of strex + * r3 -> self->thread_id + * r12 -> allow to be used by utilities as general temp + * + * The result of the strex is 0 if we acquire the lock. + * + * See comments in monitor.cc for the layout of the lock word. + * Of particular interest to this code is the test for the + * simple case - which we handle inline. For monitor enter, the + * simple case is thin lock, held by no-one. For monitor exit, + * the simple case is thin lock, held by the unlocking thread with + * a recurse count of 0. + * + * A minor complication is that there is a field in the lock word + * unrelated to locking: the hash state. This field must be ignored, but + * preserved. + * + */ +void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) +{ + FlushAllRegs(); + DCHECK_EQ(LW_SHAPE_THIN, 0); + LoadValueDirectFixed(rl_src, r0); // Get obj + LockCallTemps(); // Prepare for explicit register usage + GenNullCheck(rl_src.s_reg_low, r0, opt_flags); + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + NewLIR3(kThumb2Ldrex, r1, r0, + mirror::Object::MonitorOffset().Int32Value() >> 2); // Get object->lock + // Align owner + OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT); + // Is lock unheld on lock or held by us (==thread_id) on unlock? + NewLIR4(kThumb2Bfi, r2, r1, 0, LW_LOCK_OWNER_SHIFT - 1); + NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); + OpRegImm(kOpCmp, r1, 0); + OpIT(kCondEq, ""); + NewLIR4(kThumb2Strex, r1, r2, r0, + mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, r1, 0); + OpIT(kCondNe, "T"); + // Go expensive route - artLockObjectFromCode(self, obj); + LoadWordDisp(rARM_SELF, ENTRYPOINT_OFFSET(pLockObjectFromCode), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); + GenMemBarrier(kLoadLoad); +} + +/* + * For monitor unlock, we don't have to use ldrex/strex. Once + * we've determined that the lock is thin and that we own it with + * a zero recursion count, it's safe to punch it back to the + * initial, unlock thin state with a store word. + */ +void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) +{ + DCHECK_EQ(LW_SHAPE_THIN, 0); + FlushAllRegs(); + LoadValueDirectFixed(rl_src, r0); // Get obj + LockCallTemps(); // Prepare for explicit register usage + GenNullCheck(rl_src.s_reg_low, r0, opt_flags); + LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); // Get lock + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + // Is lock unheld on lock or held by us (==thread_id) on unlock? + OpRegRegImm(kOpAnd, r3, r1, + (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); + // Align owner + OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT); + NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); + OpRegReg(kOpSub, r1, r2); + OpIT(kCondEq, "EE"); + StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3); + // Go expensive route - UnlockObjectFromCode(obj); + LoadWordDisp(rARM_SELF, ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); + GenMemBarrier(kStoreLoad); +} + +void ArmMir2Lir::GenMoveException(RegLocation rl_dest) +{ + int ex_offset = Thread::ExceptionOffset().Int32Value(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int reset_reg = AllocTemp(); + LoadWordDisp(rARM_SELF, ex_offset, rl_result.low_reg); + LoadConstant(reset_reg, 0); + StoreWordDisp(rARM_SELF, ex_offset, reset_reg); + FreeTemp(reset_reg); + StoreValue(rl_dest, rl_result); +} + +/* + * Mark garbage collection card. Skip if the value we're storing is null. + */ +void ArmMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) +{ + int reg_card_base = AllocTemp(); + int reg_card_no = AllocTemp(); + LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); + LoadWordDisp(rARM_SELF, Thread::CardTableOffset().Int32Value(), reg_card_base); + OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); + StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, + kUnsignedByte); + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; + FreeTemp(reg_card_base); + FreeTemp(reg_card_no); +} + +void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) +{ + int spill_count = num_core_spills_ + num_fp_spills_; + /* + * On entry, r0, r1, r2 & r3 are live. Let the register allocation + * mechanism know so it doesn't try to use any of them when + * expanding the frame or flushing. This leaves the utility + * code with a single temp: r12. This should be enough. + */ + LockTemp(r0); + LockTemp(r1); + LockTemp(r2); + LockTemp(r3); + + /* + * We can safely skip the stack overflow check if we're + * a leaf *and* our frame size < fudge factor. + */ + bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && + (static_cast<size_t>(frame_size_) < + Thread::kStackOverflowReservedBytes)); + NewLIR0(kPseudoMethodEntry); + if (!skip_overflow_check) { + /* Load stack limit */ + LoadWordDisp(rARM_SELF, Thread::StackEndOffset().Int32Value(), r12); + } + /* Spill core callee saves */ + NewLIR1(kThumb2Push, core_spill_mask_); + /* Need to spill any FP regs? */ + if (num_fp_spills_) { + /* + * NOTE: fp spills are a little different from core spills in that + * they are pushed as a contiguous block. When promoting from + * the fp set, we must allocate all singles from s16..highest-promoted + */ + NewLIR1(kThumb2VPushCS, num_fp_spills_); + } + if (!skip_overflow_check) { + OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_ - (spill_count * 4)); + GenRegRegCheck(kCondCc, rARM_LR, r12, kThrowStackOverflow); + OpRegCopy(rARM_SP, rARM_LR); // Establish stack + } else { + OpRegImm(kOpSub, rARM_SP, frame_size_ - (spill_count * 4)); + } + + FlushIns(ArgLocs, rl_method); + + FreeTemp(r0); + FreeTemp(r1); + FreeTemp(r2); + FreeTemp(r3); +} + +void ArmMir2Lir::GenExitSequence() +{ + int spill_count = num_core_spills_ + num_fp_spills_; + /* + * In the exit path, r0/r1 are live - make sure they aren't + * allocated by the register utilities as temps. + */ + LockTemp(r0); + LockTemp(r1); + + NewLIR0(kPseudoMethodExit); + OpRegImm(kOpAdd, rARM_SP, frame_size_ - (spill_count * 4)); + /* Need to restore any FP callee saves? */ + if (num_fp_spills_) { + NewLIR1(kThumb2VPopCS, num_fp_spills_); + } + if (core_spill_mask_ & (1 << rARM_LR)) { + /* Unspill rARM_LR to rARM_PC */ + core_spill_mask_ &= ~(1 << rARM_LR); + core_spill_mask_ |= (1 << rARM_PC); + } + NewLIR1(kThumb2Pop, core_spill_mask_); + if (!(core_spill_mask_ & (1 << rARM_PC))) { + /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ + NewLIR1(kThumbBx, rARM_LR); + } +} + +} // namespace art diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h new file mode 100644 index 0000000000..a9199dfa7c --- /dev/null +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_ARM_CODEGENARM_H_ +#define ART_SRC_COMPILER_DEX_QUICK_ARM_CODEGENARM_H_ + +#include "dex/compiler_internals.h" + +namespace art { + +class ArmMir2Lir : public Mir2Lir { + public: + ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + + // Required for target - codegen helpers. + bool SmallLiteralDivide(Instruction::Code dalvik_opcode, RegLocation rl_src, + RegLocation rl_dest, int lit); + int LoadHelper(int offset); + LIR* LoadBaseDisp(int rBase, int displacement, int r_dest, OpSize size, int s_reg); + LIR* LoadBaseDispWide(int rBase, int displacement, int r_dest_lo, int r_dest_hi, + int s_reg); + LIR* LoadBaseIndexed(int rBase, int r_index, int r_dest, int scale, OpSize size); + LIR* LoadBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_dest, int r_dest_hi, OpSize size, int s_reg); + LIR* LoadConstantNoClobber(int r_dest, int value); + LIR* LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value); + LIR* StoreBaseDisp(int rBase, int displacement, int r_src, OpSize size); + LIR* StoreBaseDispWide(int rBase, int displacement, int r_src_lo, int r_src_hi); + LIR* StoreBaseIndexed(int rBase, int r_index, int r_src, int scale, OpSize size); + LIR* StoreBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_src, int r_src_hi, OpSize size, int s_reg); + void MarkGCCard(int val_reg, int tgt_addr_reg); + + // Required for target - register utilities. + bool IsFpReg(int reg); + bool SameRegType(int reg1, int reg2); + int AllocTypedTemp(bool fp_hint, int reg_class); + int AllocTypedTempPair(bool fp_hint, int reg_class); + int S2d(int low_reg, int high_reg); + int TargetReg(SpecialTargetRegister reg); + RegisterInfo* GetRegInfo(int reg); + RegLocation GetReturnAlt(); + RegLocation GetReturnWideAlt(); + RegLocation LocCReturn(); + RegLocation LocCReturnDouble(); + RegLocation LocCReturnFloat(); + RegLocation LocCReturnWide(); + uint32_t FpRegMask(); + uint64_t GetRegMaskCommon(int reg); + void AdjustSpillMask(); + void ClobberCalleeSave(); + void FlushReg(int reg); + void FlushRegWide(int reg1, int reg2); + void FreeCallTemps(); + void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free); + void LockCallTemps(); + void MarkPreservedSingle(int v_reg, int reg); + void CompilerInitializeRegAlloc(); + + // Required for target - miscellaneous. + AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); + void SetupTargetResourceMasks(LIR* lir); + const char* GetTargetInstFmt(int opcode); + const char* GetTargetInstName(int opcode); + std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); + uint64_t GetPCUseDefEncoding(); + uint64_t GetTargetInstFlags(int opcode); + int GetInsnSize(LIR* lir); + bool IsUnconditionalBranch(LIR* lir); + + // Required for target - Dalvik-level generators. + void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src, int scale); + void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale); + void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); + bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); + bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); + bool GenInlinedSqrt(CallInfo* info); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset, + ThrowKind kind); + RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div); + RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit, bool is_div); + void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivZeroCheck(int reg_lo, int reg_hi); + void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); + void GenExitSequence(); + void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); + void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); + void GenSelect(BasicBlock* bb, MIR* mir); + void GenMemBarrier(MemBarrierKind barrier_kind); + void GenMonitorEnter(int opt_flags, RegLocation rl_src); + void GenMonitorExit(int opt_flags, RegLocation rl_src); + void GenMoveException(RegLocation rl_dest); + void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, + int first_bit, int second_bit); + void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); + void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); + + // Required for target - single operation generators. + LIR* OpUnconditionalBranch(LIR* target); + LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target); + LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* target); + LIR* OpCondBranch(ConditionCode cc, LIR* target); + LIR* OpDecAndBranch(ConditionCode c_code, int reg, LIR* target); + LIR* OpFpRegCopy(int r_dest, int r_src); + LIR* OpIT(ConditionCode cond, const char* guide); + LIR* OpMem(OpKind op, int rBase, int disp); + LIR* OpPcRelLoad(int reg, LIR* target); + LIR* OpReg(OpKind op, int r_dest_src); + LIR* OpRegCopy(int r_dest, int r_src); + LIR* OpRegCopyNoInsert(int r_dest, int r_src); + LIR* OpRegImm(OpKind op, int r_dest_src1, int value); + LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset); + LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2); + LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value); + LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2); + LIR* OpTestSuspend(LIR* target); + LIR* OpThreadMem(OpKind op, int thread_offset); + LIR* OpVldm(int rBase, int count); + LIR* OpVstm(int rBase, int count); + void OpLea(int rBase, int reg1, int reg2, int scale, int offset); + void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, int src_hi); + void OpTlsCmp(int offset, int val); + + RegLocation ArgLoc(RegLocation loc); + LIR* LoadBaseDispBody(int rBase, int displacement, int r_dest, int r_dest_hi, OpSize size, + int s_reg); + LIR* StoreBaseDispBody(int rBase, int displacement, int r_src, int r_src_hi, OpSize size); + void GenPrintLabel(MIR* mir); + LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift); + LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift); + static const ArmEncodingMap EncodingMap[kArmLast]; + int EncodeShift(int code, int amount); + int ModifiedImmediate(uint32_t value); + ArmConditionCode ArmConditionEncoding(ConditionCode code); + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + private: + void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, + ConditionCode ccode); + int InPosition(int s_reg); + RegLocation LoadArg(RegLocation loc); + void LockLiveArgs(MIR* mir); + MIR* GetNextMir(BasicBlock** p_bb, MIR* mir); + MIR* SpecialIGet(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object); + MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object); + MIR* SpecialIdentity(MIR* mir); + LIR* LoadFPConstantValue(int r_dest, int value); + bool BadOverlap(RegLocation rl_src, RegLocation rl_dest); +}; + +} // namespace art + +#endif // ART_SRC_COMPILER_DEX_QUICK_ARM_CODEGENARM_H_ diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc new file mode 100644 index 0000000000..53a5e1a6dc --- /dev/null +++ b/compiler/dex/quick/arm/fp_arm.cc @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_lir.h" +#include "codegen_arm.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + int op = kThumbBkpt; + RegLocation rl_result; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (opcode) { + case Instruction::ADD_FLOAT_2ADDR: + case Instruction::ADD_FLOAT: + op = kThumb2Vadds; + break; + case Instruction::SUB_FLOAT_2ADDR: + case Instruction::SUB_FLOAT: + op = kThumb2Vsubs; + break; + case Instruction::DIV_FLOAT_2ADDR: + case Instruction::DIV_FLOAT: + op = kThumb2Vdivs; + break; + case Instruction::MUL_FLOAT_2ADDR: + case Instruction::MUL_FLOAT: + op = kThumb2Vmuls; + break; + case Instruction::REM_FLOAT_2ADDR: + case Instruction::REM_FLOAT: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2, false); + rl_result = GetReturn(true); + StoreValue(rl_dest, rl_result); + return; + case Instruction::NEG_FLOAT: + GenNegFloat(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR3(op, rl_result.low_reg, rl_src1.low_reg, rl_src2.low_reg); + StoreValue(rl_dest, rl_result); +} + +void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + int op = kThumbBkpt; + RegLocation rl_result; + + switch (opcode) { + case Instruction::ADD_DOUBLE_2ADDR: + case Instruction::ADD_DOUBLE: + op = kThumb2Vaddd; + break; + case Instruction::SUB_DOUBLE_2ADDR: + case Instruction::SUB_DOUBLE: + op = kThumb2Vsubd; + break; + case Instruction::DIV_DOUBLE_2ADDR: + case Instruction::DIV_DOUBLE: + op = kThumb2Vdivd; + break; + case Instruction::MUL_DOUBLE_2ADDR: + case Instruction::MUL_DOUBLE: + op = kThumb2Vmuld; + break; + case Instruction::REM_DOUBLE_2ADDR: + case Instruction::REM_DOUBLE: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2, false); + rl_result = GetReturnWide(true); + StoreValueWide(rl_dest, rl_result); + return; + case Instruction::NEG_DOUBLE: + GenNegDouble(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + + rl_src1 = LoadValueWide(rl_src1, kFPReg); + DCHECK(rl_src1.wide); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + DCHECK(rl_src2.wide); + rl_result = EvalLoc(rl_dest, kFPReg, true); + DCHECK(rl_dest.wide); + DCHECK(rl_result.wide); + NewLIR3(op, S2d(rl_result.low_reg, rl_result.high_reg), S2d(rl_src1.low_reg, rl_src1.high_reg), + S2d(rl_src2.low_reg, rl_src2.high_reg)); + StoreValueWide(rl_dest, rl_result); +} + +void ArmMir2Lir::GenConversion(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src) +{ + int op = kThumbBkpt; + int src_reg; + RegLocation rl_result; + + switch (opcode) { + case Instruction::INT_TO_FLOAT: + op = kThumb2VcvtIF; + break; + case Instruction::FLOAT_TO_INT: + op = kThumb2VcvtFI; + break; + case Instruction::DOUBLE_TO_FLOAT: + op = kThumb2VcvtDF; + break; + case Instruction::FLOAT_TO_DOUBLE: + op = kThumb2VcvtFd; + break; + case Instruction::INT_TO_DOUBLE: + op = kThumb2VcvtID; + break; + case Instruction::DOUBLE_TO_INT: + op = kThumb2VcvtDI; + break; + case Instruction::LONG_TO_DOUBLE: + GenConversionCall(ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src); + return; + case Instruction::FLOAT_TO_LONG: + GenConversionCall(ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src); + return; + case Instruction::LONG_TO_FLOAT: + GenConversionCall(ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src); + return; + case Instruction::DOUBLE_TO_LONG: + GenConversionCall(ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + if (rl_src.wide) { + rl_src = LoadValueWide(rl_src, kFPReg); + src_reg = S2d(rl_src.low_reg, rl_src.high_reg); + } else { + rl_src = LoadValue(rl_src, kFPReg); + src_reg = rl_src.low_reg; + } + if (rl_dest.wide) { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, S2d(rl_result.low_reg, rl_result.high_reg), src_reg); + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, rl_result.low_reg, src_reg); + StoreValue(rl_dest, rl_result); + } +} + +void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, + bool is_double) +{ + LIR* target = &block_label_list_[bb->taken->id]; + RegLocation rl_src1; + RegLocation rl_src2; + if (is_double) { + rl_src1 = mir_graph_->GetSrcWide(mir, 0); + rl_src2 = mir_graph_->GetSrcWide(mir, 2); + rl_src1 = LoadValueWide(rl_src1, kFPReg); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + NewLIR2(kThumb2Vcmpd, S2d(rl_src1.low_reg, rl_src2.high_reg), + S2d(rl_src2.low_reg, rl_src2.high_reg)); + } else { + rl_src1 = mir_graph_->GetSrc(mir, 0); + rl_src2 = mir_graph_->GetSrc(mir, 1); + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + NewLIR2(kThumb2Vcmps, rl_src1.low_reg, rl_src2.low_reg); + } + NewLIR0(kThumb2Fmstat); + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + switch(ccode) { + case kCondEq: + case kCondNe: + break; + case kCondLt: + if (gt_bias) { + ccode = kCondMi; + } + break; + case kCondLe: + if (gt_bias) { + ccode = kCondLs; + } + break; + case kCondGt: + if (gt_bias) { + ccode = kCondHi; + } + break; + case kCondGe: + if (gt_bias) { + ccode = kCondCs; + } + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCondBranch(ccode, target); +} + + +void ArmMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + bool is_double = false; + int default_result = -1; + RegLocation rl_result; + + switch (opcode) { + case Instruction::CMPL_FLOAT: + is_double = false; + default_result = -1; + break; + case Instruction::CMPG_FLOAT: + is_double = false; + default_result = 1; + break; + case Instruction::CMPL_DOUBLE: + is_double = true; + default_result = -1; + break; + case Instruction::CMPG_DOUBLE: + is_double = true; + default_result = 1; + break; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + if (is_double) { + rl_src1 = LoadValueWide(rl_src1, kFPReg); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadConstant(rl_result.low_reg, default_result); + NewLIR2(kThumb2Vcmpd, S2d(rl_src1.low_reg, rl_src2.high_reg), + S2d(rl_src2.low_reg, rl_src2.high_reg)); + } else { + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadConstant(rl_result.low_reg, default_result); + NewLIR2(kThumb2Vcmps, rl_src1.low_reg, rl_src2.low_reg); + } + DCHECK(!ARM_FPREG(rl_result.low_reg)); + NewLIR0(kThumb2Fmstat); + + OpIT((default_result == -1) ? kCondGt : kCondMi, ""); + NewLIR2(kThumb2MovImmShift, rl_result.low_reg, + ModifiedImmediate(-default_result)); // Must not alter ccodes + GenBarrier(); + + OpIT(kCondEq, ""); + LoadConstant(rl_result.low_reg, 0); + GenBarrier(); + + StoreValue(rl_dest, rl_result); +} + +void ArmMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result; + rl_src = LoadValue(rl_src, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(kThumb2Vnegs, rl_result.low_reg, rl_src.low_reg); + StoreValue(rl_dest, rl_result); +} + +void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result; + rl_src = LoadValueWide(rl_src, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(kThumb2Vnegd, S2d(rl_result.low_reg, rl_result.high_reg), + S2d(rl_src.low_reg, rl_src.high_reg)); + StoreValueWide(rl_dest, rl_result); +} + +bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) { + DCHECK_EQ(cu_->instruction_set, kThumb2); + LIR *branch; + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTargetWide(info); // double place for result + rl_src = LoadValueWide(rl_src, kFPReg); + RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(kThumb2Vsqrtd, S2d(rl_result.low_reg, rl_result.high_reg), + S2d(rl_src.low_reg, rl_src.high_reg)); + NewLIR2(kThumb2Vcmpd, S2d(rl_result.low_reg, rl_result.high_reg), + S2d(rl_result.low_reg, rl_result.high_reg)); + NewLIR0(kThumb2Fmstat); + branch = NewLIR2(kThumbBCond, 0, kArmCondEq); + ClobberCalleeSave(); + LockCallTemps(); // Using fixed registers + int r_tgt = LoadHelper(ENTRYPOINT_OFFSET(pSqrt)); + NewLIR3(kThumb2Fmrrd, r0, r1, S2d(rl_src.low_reg, rl_src.high_reg)); + NewLIR1(kThumbBlxR, r_tgt); + NewLIR3(kThumb2Fmdrr, S2d(rl_result.low_reg, rl_result.high_reg), r0, r1); + branch->target = NewLIR0(kPseudoTargetLabel); + StoreValueWide(rl_dest, rl_result); + return true; +} + + +} // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc new file mode 100644 index 0000000000..feea896e9f --- /dev/null +++ b/compiler/dex/quick/arm/int_arm.cc @@ -0,0 +1,1187 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the Thumb2 ISA. */ + +#include "arm_lir.h" +#include "codegen_arm.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mirror/array.h" +#include "oat/runtime/oat_support_entrypoints.h" + +namespace art { + +LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, + int src2, LIR* target) +{ + OpRegReg(kOpCmp, src1, src2); + return OpCondBranch(cond, target); +} + +/* + * Generate a Thumb2 IT instruction, which can nullify up to + * four subsequent instructions based on a condition and its + * inverse. The condition applies to the first instruction, which + * is executed if the condition is met. The string "guide" consists + * of 0 to 3 chars, and applies to the 2nd through 4th instruction. + * A "T" means the instruction is executed if the condition is + * met, and an "E" means the instruction is executed if the condition + * is not met. + */ +LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) +{ + int mask; + int mask3 = 0; + int mask2 = 0; + int mask1 = 0; + ArmConditionCode code = ArmConditionEncoding(ccode); + int cond_bit = code & 1; + int alt_bit = cond_bit ^ 1; + + //Note: case fallthroughs intentional + switch (strlen(guide)) { + case 3: + mask1 = (guide[2] == 'T') ? cond_bit : alt_bit; + case 2: + mask2 = (guide[1] == 'T') ? cond_bit : alt_bit; + case 1: + mask3 = (guide[0] == 'T') ? cond_bit : alt_bit; + break; + case 0: + break; + default: + LOG(FATAL) << "OAT: bad case in OpIT"; + } + mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | + (1 << (3 - strlen(guide))); + return NewLIR2(kThumb2It, code, mask); +} + +/* + * 64-bit 3way compare function. + * mov rX, #-1 + * cmp op1hi, op2hi + * blt done + * bgt flip + * sub rX, op1lo, op2lo (treat as unsigned) + * beq done + * ite hi + * mov(hi) rX, #-1 + * mov(!hi) rX, #1 + * flip: + * neg rX + * done: + */ +void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LIR* target1; + LIR* target2; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + int t_reg = AllocTemp(); + LoadConstant(t_reg, -1); + OpRegReg(kOpCmp, rl_src1.high_reg, rl_src2.high_reg); + LIR* branch1 = OpCondBranch(kCondLt, NULL); + LIR* branch2 = OpCondBranch(kCondGt, NULL); + OpRegRegReg(kOpSub, t_reg, rl_src1.low_reg, rl_src2.low_reg); + LIR* branch3 = OpCondBranch(kCondEq, NULL); + + OpIT(kCondHi, "E"); + NewLIR2(kThumb2MovImmShift, t_reg, ModifiedImmediate(-1)); + LoadConstant(t_reg, 1); + GenBarrier(); + + target2 = NewLIR0(kPseudoTargetLabel); + OpRegReg(kOpNeg, t_reg, t_reg); + + target1 = NewLIR0(kPseudoTargetLabel); + + RegLocation rl_temp = LocCReturn(); // Just using as template, will change + rl_temp.low_reg = t_reg; + StoreValue(rl_dest, rl_temp); + FreeTemp(t_reg); + + branch1->target = target1; + branch2->target = target2; + branch3->target = branch1->target; +} + +void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, + int64_t val, ConditionCode ccode) +{ + int32_t val_lo = Low32Bits(val); + int32_t val_hi = High32Bits(val); + DCHECK(ModifiedImmediate(val_lo) >= 0); + DCHECK(ModifiedImmediate(val_hi) >= 0); + LIR* taken = &block_label_list_[bb->taken->id]; + LIR* not_taken = &block_label_list_[bb->fall_through->id]; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + int32_t low_reg = rl_src1.low_reg; + int32_t high_reg = rl_src1.high_reg; + + switch(ccode) { + case kCondEq: + case kCondNe: + LIR* target; + ConditionCode condition; + if (ccode == kCondEq) { + target = not_taken; + condition = kCondEq; + } else { + target = taken; + condition = kCondNe; + } + if (val == 0) { + int t_reg = AllocTemp(); + NewLIR4(kThumb2OrrRRRs, t_reg, low_reg, high_reg, 0); + FreeTemp(t_reg); + OpCondBranch(condition, taken); + return; + } + OpCmpImmBranch(kCondNe, high_reg, val_hi, target); + break; + case kCondLt: + OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); + ccode = kCondCc; + break; + case kCondLe: + OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); + ccode = kCondLs; + break; + case kCondGt: + OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); + ccode = kCondHi; + break; + case kCondGe: + OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); + OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); + ccode = kCondCs; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCmpImmBranch(ccode, low_reg, val_lo, taken); +} + +void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) +{ + RegLocation rl_result; + RegLocation rl_src = mir_graph_->GetSrc(mir, 0); + // Temporary debugging code + int dest_sreg = mir->ssa_rep->defs[0]; + if ((dest_sreg < 0) || (dest_sreg >= mir_graph_->GetNumSSARegs())) { + LOG(INFO) << "Bad target sreg: " << dest_sreg << ", in " + << PrettyMethod(cu_->method_idx,*cu_->dex_file); + LOG(INFO) << "at dex offset 0x" << std::hex << mir->offset; + LOG(INFO) << "vreg = " << mir_graph_->SRegToVReg(dest_sreg); + LOG(INFO) << "num uses = " << mir->ssa_rep->num_uses; + if (mir->ssa_rep->num_uses == 1) { + LOG(INFO) << "CONST case, vals = " << mir->dalvikInsn.vB << ", " << mir->dalvikInsn.vC; + } else { + LOG(INFO) << "MOVE case, operands = " << mir->ssa_rep->uses[1] << ", " + << mir->ssa_rep->uses[2]; + } + CHECK(false) << "Invalid target sreg on Select."; + } + // End temporary debugging code + RegLocation rl_dest = mir_graph_->GetDest(mir); + rl_src = LoadValue(rl_src, kCoreReg); + if (mir->ssa_rep->num_uses == 1) { + // CONST case + int true_val = mir->dalvikInsn.vB; + int false_val = mir->dalvikInsn.vC; + rl_result = EvalLoc(rl_dest, kCoreReg, true); + if ((true_val == 1) && (false_val == 0)) { + OpRegRegImm(kOpRsub, rl_result.low_reg, rl_src.low_reg, 1); + OpIT(kCondCc, ""); + LoadConstant(rl_result.low_reg, 0); + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact + } else if (InexpensiveConstantInt(true_val) && InexpensiveConstantInt(false_val)) { + OpRegImm(kOpCmp, rl_src.low_reg, 0); + OpIT(kCondEq, "E"); + LoadConstant(rl_result.low_reg, true_val); + LoadConstant(rl_result.low_reg, false_val); + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact + } else { + // Unlikely case - could be tuned. + int t_reg1 = AllocTemp(); + int t_reg2 = AllocTemp(); + LoadConstant(t_reg1, true_val); + LoadConstant(t_reg2, false_val); + OpRegImm(kOpCmp, rl_src.low_reg, 0); + OpIT(kCondEq, "E"); + OpRegCopy(rl_result.low_reg, t_reg1); + OpRegCopy(rl_result.low_reg, t_reg2); + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact + } + } else { + // MOVE case + RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; + RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; + rl_true = LoadValue(rl_true, kCoreReg); + rl_false = LoadValue(rl_false, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegImm(kOpCmp, rl_src.low_reg, 0); + OpIT(kCondEq, "E"); + LIR* l1 = OpRegCopy(rl_result.low_reg, rl_true.low_reg); + l1->flags.is_nop = false; // Make sure this instruction isn't optimized away + LIR* l2 = OpRegCopy(rl_result.low_reg, rl_false.low_reg); + l2->flags.is_nop = false; // Make sure this instruction isn't optimized away + GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact + } + StoreValue(rl_dest, rl_result); +} + +void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) +{ + RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); + RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); + // Normalize such that if either operand is constant, src2 will be constant. + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + if (rl_src1.is_const) { + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + ccode = FlipComparisonOrder(ccode); + } + if (rl_src2.is_const) { + RegLocation rl_temp = UpdateLocWide(rl_src2); + // Do special compare/branch against simple const operand if not already in registers. + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + if ((rl_temp.location != kLocPhysReg) && + ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { + GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); + return; + } + } + LIR* taken = &block_label_list_[bb->taken->id]; + LIR* not_taken = &block_label_list_[bb->fall_through->id]; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + OpRegReg(kOpCmp, rl_src1.high_reg, rl_src2.high_reg); + switch(ccode) { + case kCondEq: + OpCondBranch(kCondNe, not_taken); + break; + case kCondNe: + OpCondBranch(kCondNe, taken); + break; + case kCondLt: + OpCondBranch(kCondLt, taken); + OpCondBranch(kCondGt, not_taken); + ccode = kCondCc; + break; + case kCondLe: + OpCondBranch(kCondLt, taken); + OpCondBranch(kCondGt, not_taken); + ccode = kCondLs; + break; + case kCondGt: + OpCondBranch(kCondGt, taken); + OpCondBranch(kCondLt, not_taken); + ccode = kCondHi; + break; + case kCondGe: + OpCondBranch(kCondGt, taken); + OpCondBranch(kCondLt, not_taken); + ccode = kCondCs; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg); + OpCondBranch(ccode, taken); +} + +/* + * Generate a register comparison to an immediate and branch. Caller + * is responsible for setting branch target field. + */ +LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value, + LIR* target) +{ + LIR* branch; + int mod_imm; + ArmConditionCode arm_cond = ArmConditionEncoding(cond); + if ((ARM_LOWREG(reg)) && (check_value == 0) && + ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { + branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, + reg, 0); + } else { + mod_imm = ModifiedImmediate(check_value); + if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) { + NewLIR2(kThumbCmpRI8, reg, check_value); + } else if (mod_imm >= 0) { + NewLIR2(kThumb2CmpRI12, reg, mod_imm); + } else { + int t_reg = AllocTemp(); + LoadConstant(t_reg, check_value); + OpRegReg(kOpCmp, reg, t_reg); + } + branch = NewLIR2(kThumbBCond, 0, arm_cond); + } + branch->target = target; + return branch; +} + +LIR* ArmMir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) +{ + LIR* res; + int opcode; + if (ARM_FPREG(r_dest) || ARM_FPREG(r_src)) + return OpFpRegCopy(r_dest, r_src); + if (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src)) + opcode = kThumbMovRR; + else if (!ARM_LOWREG(r_dest) && !ARM_LOWREG(r_src)) + opcode = kThumbMovRR_H2H; + else if (ARM_LOWREG(r_dest)) + opcode = kThumbMovRR_H2L; + else + opcode = kThumbMovRR_L2H; + res = RawLIR(current_dalvik_offset_, opcode, r_dest, r_src); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +LIR* ArmMir2Lir::OpRegCopy(int r_dest, int r_src) +{ + LIR* res = OpRegCopyNoInsert(r_dest, r_src); + AppendLIR(res); + return res; +} + +void ArmMir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, + int src_hi) +{ + bool dest_fp = ARM_FPREG(dest_lo) && ARM_FPREG(dest_hi); + bool src_fp = ARM_FPREG(src_lo) && ARM_FPREG(src_hi); + DCHECK_EQ(ARM_FPREG(src_lo), ARM_FPREG(src_hi)); + DCHECK_EQ(ARM_FPREG(dest_lo), ARM_FPREG(dest_hi)); + if (dest_fp) { + if (src_fp) { + OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); + } else { + NewLIR3(kThumb2Fmdrr, S2d(dest_lo, dest_hi), src_lo, src_hi); + } + } else { + if (src_fp) { + NewLIR3(kThumb2Fmrrd, dest_lo, dest_hi, S2d(src_lo, src_hi)); + } else { + // Handle overlap + if (src_hi == dest_lo) { + OpRegCopy(dest_hi, src_hi); + OpRegCopy(dest_lo, src_lo); + } else { + OpRegCopy(dest_lo, src_lo); + OpRegCopy(dest_hi, src_hi); + } + } + } +} + +// Table of magic divisors +struct MagicTable { + uint32_t magic; + uint32_t shift; + DividePattern pattern; +}; + +static const MagicTable magic_table[] = { + {0, 0, DivideNone}, // 0 + {0, 0, DivideNone}, // 1 + {0, 0, DivideNone}, // 2 + {0x55555556, 0, Divide3}, // 3 + {0, 0, DivideNone}, // 4 + {0x66666667, 1, Divide5}, // 5 + {0x2AAAAAAB, 0, Divide3}, // 6 + {0x92492493, 2, Divide7}, // 7 + {0, 0, DivideNone}, // 8 + {0x38E38E39, 1, Divide5}, // 9 + {0x66666667, 2, Divide5}, // 10 + {0x2E8BA2E9, 1, Divide5}, // 11 + {0x2AAAAAAB, 1, Divide5}, // 12 + {0x4EC4EC4F, 2, Divide5}, // 13 + {0x92492493, 3, Divide7}, // 14 + {0x88888889, 3, Divide7}, // 15 +}; + +// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) +bool ArmMir2Lir::SmallLiteralDivide(Instruction::Code dalvik_opcode, + RegLocation rl_src, RegLocation rl_dest, int lit) +{ + if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) { + return false; + } + DividePattern pattern = magic_table[lit].pattern; + if (pattern == DivideNone) { + return false; + } + // Tuning: add rem patterns + if (dalvik_opcode != Instruction::DIV_INT_LIT8) { + return false; + } + + int r_magic = AllocTemp(); + LoadConstant(r_magic, magic_table[lit].magic); + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int r_hi = AllocTemp(); + int r_lo = AllocTemp(); + NewLIR4(kThumb2Smull, r_lo, r_hi, r_magic, rl_src.low_reg); + switch(pattern) { + case Divide3: + OpRegRegRegShift(kOpSub, rl_result.low_reg, r_hi, + rl_src.low_reg, EncodeShift(kArmAsr, 31)); + break; + case Divide5: + OpRegRegImm(kOpAsr, r_lo, rl_src.low_reg, 31); + OpRegRegRegShift(kOpRsub, rl_result.low_reg, r_lo, r_hi, + EncodeShift(kArmAsr, magic_table[lit].shift)); + break; + case Divide7: + OpRegReg(kOpAdd, r_hi, rl_src.low_reg); + OpRegRegImm(kOpAsr, r_lo, rl_src.low_reg, 31); + OpRegRegRegShift(kOpRsub, rl_result.low_reg, r_lo, r_hi, + EncodeShift(kArmAsr, magic_table[lit].shift)); + break; + default: + LOG(FATAL) << "Unexpected pattern: " << pattern; + } + StoreValue(rl_dest, rl_result); + return true; +} + +LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code, + int reg1, int base, int offset, ThrowKind kind) +{ + LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm"; + return NULL; +} + +RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit, + bool is_div) +{ + LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; + return rl_dest; +} + +RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2, + bool is_div) +{ + LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; + return rl_dest; +} + +bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) +{ + DCHECK_EQ(cu_->instruction_set, kThumb2); + RegLocation rl_src1 = info->args[0]; + RegLocation rl_src2 = info->args[1]; + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg); + OpIT((is_min) ? kCondGt : kCondLt, "E"); + OpRegReg(kOpMov, rl_result.low_reg, rl_src2.low_reg); + OpRegReg(kOpMov, rl_result.low_reg, rl_src1.low_reg); + GenBarrier(); + StoreValue(rl_dest, rl_result); + return true; +} + +void ArmMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) +{ + LOG(FATAL) << "Unexpected use of OpLea for Arm"; +} + +void ArmMir2Lir::OpTlsCmp(int offset, int val) +{ + LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm"; +} + +bool ArmMir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) { + DCHECK_EQ(cu_->instruction_set, kThumb2); + // Unused - RegLocation rl_src_unsafe = info->args[0]; + RegLocation rl_src_obj= info->args[1]; // Object - known non-null + RegLocation rl_src_offset= info->args[2]; // long low + rl_src_offset.wide = 0; // ignore high half in info->args[3] + RegLocation rl_src_expected= info->args[4]; // int or Object + RegLocation rl_src_new_value= info->args[5]; // int or Object + RegLocation rl_dest = InlineTarget(info); // boolean place for result + + + // Release store semantics, get the barrier out of the way. TODO: revisit + GenMemBarrier(kStoreLoad); + + RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + + if (need_write_barrier && !mir_graph_->IsConstantNullRef(rl_new_value)) { + // Mark card for object assuming new value is stored. + MarkGCCard(rl_new_value.low_reg, rl_object.low_reg); + } + + RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); + + int r_ptr = AllocTemp(); + OpRegRegReg(kOpAdd, r_ptr, rl_object.low_reg, rl_offset.low_reg); + + // Free now unneeded rl_object and rl_offset to give more temps. + ClobberSReg(rl_object.s_reg_low); + FreeTemp(rl_object.low_reg); + ClobberSReg(rl_offset.s_reg_low); + FreeTemp(rl_offset.low_reg); + + int r_old_value = AllocTemp(); + NewLIR3(kThumb2Ldrex, r_old_value, r_ptr, 0); // r_old_value := [r_ptr] + + RegLocation rl_expected = LoadValue(rl_src_expected, kCoreReg); + + // if (r_old_value == rExpected) { + // [r_ptr] <- r_new_value && r_result := success ? 0 : 1 + // r_result ^= 1 + // } else { + // r_result := 0 + // } + OpRegReg(kOpCmp, r_old_value, rl_expected.low_reg); + FreeTemp(r_old_value); // Now unneeded. + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpIT(kCondEq, "TE"); + NewLIR4(kThumb2Strex, rl_result.low_reg, rl_new_value.low_reg, r_ptr, 0); + FreeTemp(r_ptr); // Now unneeded. + OpRegImm(kOpXor, rl_result.low_reg, 1); + OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); + + StoreValue(rl_dest, rl_result); + + return true; +} + +LIR* ArmMir2Lir::OpPcRelLoad(int reg, LIR* target) +{ + return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg, 0, 0, 0, 0, target); +} + +LIR* ArmMir2Lir::OpVldm(int rBase, int count) +{ + return NewLIR3(kThumb2Vldms, rBase, fr0, count); +} + +LIR* ArmMir2Lir::OpVstm(int rBase, int count) +{ + return NewLIR3(kThumb2Vstms, rBase, fr0, count); +} + +void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, + RegLocation rl_result, int lit, + int first_bit, int second_bit) +{ + OpRegRegRegShift(kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg, + EncodeShift(kArmLsl, second_bit - first_bit)); + if (first_bit != 0) { + OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit); + } +} + +void ArmMir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) +{ + int t_reg = AllocTemp(); + NewLIR4(kThumb2OrrRRRs, t_reg, reg_lo, reg_hi, 0); + FreeTemp(t_reg); + GenCheck(kCondEq, kThrowDivZero); +} + +// Test suspend flag, return target of taken suspend branch +LIR* ArmMir2Lir::OpTestSuspend(LIR* target) +{ + NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1); + return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); +} + +// Decrement register and branch on condition +LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) +{ + // Combine sub & test using sub setflags encoding here + NewLIR3(kThumb2SubsRRI12, reg, reg, 1); + return OpCondBranch(c_code, target); +} + +void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) +{ +#if ANDROID_SMP != 0 + int dmb_flavor; + // TODO: revisit Arm barrier kinds + switch (barrier_kind) { + case kLoadStore: dmb_flavor = kSY; break; + case kLoadLoad: dmb_flavor = kSY; break; + case kStoreStore: dmb_flavor = kST; break; + case kStoreLoad: dmb_flavor = kSY; break; + default: + LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind; + dmb_flavor = kSY; // quiet gcc. + break; + } + LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor); + dmb->def_mask = ENCODE_ALL; +#endif +} + +void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) +{ + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int z_reg = AllocTemp(); + LoadConstantNoClobber(z_reg, 0); + // Check for destructive overlap + if (rl_result.low_reg == rl_src.high_reg) { + int t_reg = AllocTemp(); + OpRegRegReg(kOpSub, rl_result.low_reg, z_reg, rl_src.low_reg); + OpRegRegReg(kOpSbc, rl_result.high_reg, z_reg, t_reg); + FreeTemp(t_reg); + } else { + OpRegRegReg(kOpSub, rl_result.low_reg, z_reg, rl_src.low_reg); + OpRegRegReg(kOpSbc, rl_result.high_reg, z_reg, rl_src.high_reg); + } + FreeTemp(z_reg); + StoreValueWide(rl_dest, rl_result); +} + + + /* + * Check to see if a result pair has a misaligned overlap with an operand pair. This + * is not usual for dx to generate, but it is legal (for now). In a future rev of + * dex, we'll want to make this case illegal. + */ +bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) +{ + DCHECK(rl_src.wide); + DCHECK(rl_dest.wide); + return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1); +} + +void ArmMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + /* + * To pull off inline multiply, we have a worst-case requirement of 8 temporary + * registers. Normally for Arm, we get 5. We can get to 6 by including + * lr in the temp set. The only problematic case is all operands and result are + * distinct, and none have been promoted. In that case, we can succeed by aggressively + * freeing operand temp registers after they are no longer needed. All other cases + * can proceed normally. We'll just punt on the case of the result having a misaligned + * overlap with either operand and send that case to a runtime handler. + */ + RegLocation rl_result; + if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) { + int func_offset = ENTRYPOINT_OFFSET(pLmul); + FlushAllRegs(); + CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); + rl_result = GetReturnWide(false); + StoreValueWide(rl_dest, rl_result); + return; + } + // Temporarily add LR to the temp pool, and assign it to tmp1 + MarkTemp(rARM_LR); + FreeTemp(rARM_LR); + int tmp1 = rARM_LR; + LockTemp(rARM_LR); + + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + + bool special_case = true; + // If operands are the same, or any pair has been promoted we're not the special case. + if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || + (!IsTemp(rl_src1.low_reg) && !IsTemp(rl_src1.high_reg)) || + (!IsTemp(rl_src2.low_reg) && !IsTemp(rl_src2.high_reg))) { + special_case = false; + } + // Tuning: if rl_dest has been promoted and is *not* either operand, could use directly. + int res_lo = AllocTemp(); + int res_hi; + if (rl_src1.low_reg == rl_src2.low_reg) { + res_hi = AllocTemp(); + NewLIR3(kThumb2MulRRR, tmp1, rl_src1.low_reg, rl_src1.high_reg); + NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src1.low_reg, rl_src1.low_reg); + OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); + } else { + // In the special case, all temps are now allocated + NewLIR3(kThumb2MulRRR, tmp1, rl_src2.low_reg, rl_src1.high_reg); + if (special_case) { + DCHECK_NE(rl_src1.low_reg, rl_src2.low_reg); + DCHECK_NE(rl_src1.high_reg, rl_src2.high_reg); + FreeTemp(rl_src1.high_reg); + } + res_hi = AllocTemp(); + + NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src2.low_reg, rl_src1.low_reg); + NewLIR4(kThumb2Mla, tmp1, rl_src1.low_reg, rl_src2.high_reg, tmp1); + NewLIR4(kThumb2AddRRR, res_hi, tmp1, res_hi, 0); + if (special_case) { + FreeTemp(rl_src1.low_reg); + Clobber(rl_src1.low_reg); + Clobber(rl_src1.high_reg); + } + } + FreeTemp(tmp1); + rl_result = GetReturnWide(false); // Just using as a template. + rl_result.low_reg = res_lo; + rl_result.high_reg = res_hi; + StoreValueWide(rl_dest, rl_result); + // Now, restore lr to its non-temp status. + Clobber(rARM_LR); + UnmarkTemp(rARM_LR); +} + +void ArmMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenAddLong for Arm"; +} + +void ArmMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenSubLong for Arm"; +} + +void ArmMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenAndLong for Arm"; +} + +void ArmMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenOrLong for Arm"; +} + +void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of genXoLong for Arm"; +} + +/* + * Generate array load + */ +void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale) +{ + RegisterClass reg_class = oat_reg_class_by_size(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + RegLocation rl_result; + bool constant_index = rl_index.is_const; + rl_array = LoadValue(rl_array, kCoreReg); + if (!constant_index) { + rl_index = LoadValue(rl_index, kCoreReg); + } + + if (rl_dest.wide) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + // If index is constant, just fold it into the data offset + if (constant_index) { + data_offset += mir_graph_->ConstantValue(rl_index) << scale; + } + + /* null object? */ + GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + int reg_len = INVALID_REG; + if (needs_range_check) { + reg_len = AllocTemp(); + /* Get len */ + LoadWordDisp(rl_array.low_reg, len_offset, reg_len); + } + if (rl_dest.wide || rl_dest.fp || constant_index) { + int reg_ptr; + if (constant_index) { + reg_ptr = rl_array.low_reg; // NOTE: must not alter reg_ptr in constant case. + } else { + // No special indexed operation, lea + load w/ displacement + reg_ptr = AllocTemp(); + OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, + EncodeShift(kArmLsl, scale)); + FreeTemp(rl_index.low_reg); + } + rl_result = EvalLoc(rl_dest, reg_class, true); + + if (needs_range_check) { + if (constant_index) { + GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds); + } else { + GenRegRegCheck(kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds); + } + FreeTemp(reg_len); + } + if (rl_dest.wide) { + LoadBaseDispWide(reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); + if (!constant_index) { + FreeTemp(reg_ptr); + } + StoreValueWide(rl_dest, rl_result); + } else { + LoadBaseDisp(reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG); + if (!constant_index) { + FreeTemp(reg_ptr); + } + StoreValue(rl_dest, rl_result); + } + } else { + // Offset base, then use indexed load + int reg_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, reg_ptr, rl_array.low_reg, data_offset); + FreeTemp(rl_array.low_reg); + rl_result = EvalLoc(rl_dest, reg_class, true); + + if (needs_range_check) { + // TODO: change kCondCS to a more meaningful name, is the sense of + // carry-set/clear flipped? + GenRegRegCheck(kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + FreeTemp(reg_len); + } + LoadBaseIndexed(reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size); + FreeTemp(reg_ptr); + StoreValue(rl_dest, rl_result); + } +} + +/* + * Generate array store + * + */ +void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) +{ + RegisterClass reg_class = oat_reg_class_by_size(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + bool constant_index = rl_index.is_const; + + if (rl_src.wide) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + // If index is constant, just fold it into the data offset. + if (constant_index) { + data_offset += mir_graph_->ConstantValue(rl_index) << scale; + } + + rl_array = LoadValue(rl_array, kCoreReg); + if (!constant_index) { + rl_index = LoadValue(rl_index, kCoreReg); + } + + int reg_ptr; + if (constant_index) { + reg_ptr = rl_array.low_reg; + } else if (IsTemp(rl_array.low_reg)) { + Clobber(rl_array.low_reg); + reg_ptr = rl_array.low_reg; + } else { + reg_ptr = AllocTemp(); + } + + /* null object? */ + GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + int reg_len = INVALID_REG; + if (needs_range_check) { + reg_len = AllocTemp(); + //NOTE: max live temps(4) here. + /* Get len */ + LoadWordDisp(rl_array.low_reg, len_offset, reg_len); + } + /* at this point, reg_ptr points to array, 2 live temps */ + if (rl_src.wide || rl_src.fp || constant_index) { + if (rl_src.wide) { + rl_src = LoadValueWide(rl_src, reg_class); + } else { + rl_src = LoadValue(rl_src, reg_class); + } + if (!constant_index) { + OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg, + EncodeShift(kArmLsl, scale)); + } + if (needs_range_check) { + if (constant_index) { + GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds); + } else { + GenRegRegCheck(kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds); + } + FreeTemp(reg_len); + } + + if (rl_src.wide) { + StoreBaseDispWide(reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg); + } else { + StoreBaseDisp(reg_ptr, data_offset, rl_src.low_reg, size); + } + } else { + /* reg_ptr -> array data */ + OpRegRegImm(kOpAdd, reg_ptr, rl_array.low_reg, data_offset); + rl_src = LoadValue(rl_src, reg_class); + if (needs_range_check) { + GenRegRegCheck(kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + FreeTemp(reg_len); + } + StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg, + scale, size); + } + if (!constant_index) { + FreeTemp(reg_ptr); + } +} + +/* + * Generate array store + * + */ +void ArmMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) +{ + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); + + FlushAllRegs(); // Use explicit registers + LockCallTemps(); + + int r_value = TargetReg(kArg0); // Register holding value + int r_array_class = TargetReg(kArg1); // Register holding array's Class + int r_array = TargetReg(kArg2); // Register holding array + int r_index = TargetReg(kArg3); // Register holding index into array + + LoadValueDirectFixed(rl_array, r_array); // Grab array + LoadValueDirectFixed(rl_src, r_value); // Grab value + LoadValueDirectFixed(rl_index, r_index); // Grab index + + GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? + + // Store of null? + LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); + + // Get the array's class. + LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); + CallRuntimeHelperRegReg(ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value, + r_array_class, true); + // Redo LoadValues in case they didn't survive the call. + LoadValueDirectFixed(rl_array, r_array); // Reload array + LoadValueDirectFixed(rl_index, r_index); // Reload index + LoadValueDirectFixed(rl_src, r_value); // Reload value + r_array_class = INVALID_REG; + + // Branch here if value to be stored == null + LIR* target = NewLIR0(kPseudoTargetLabel); + null_value_check->target = target; + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + int reg_len = INVALID_REG; + if (needs_range_check) { + reg_len = TargetReg(kArg1); + LoadWordDisp(r_array, len_offset, reg_len); // Get len + } + /* r_ptr -> array data */ + int r_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset); + if (needs_range_check) { + GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds); + } + StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord); + FreeTemp(r_ptr); + FreeTemp(r_index); + if (!mir_graph_->IsConstantNullRef(rl_src)) { + MarkGCCard(r_value, r_array); + } +} + +void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) +{ + rl_src = LoadValueWide(rl_src, kCoreReg); + // Per spec, we only care about low 6 bits of shift amount. + int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; + if (shift_amount == 0) { + StoreValueWide(rl_dest, rl_src); + return; + } + if (BadOverlap(rl_src, rl_dest)) { + GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); + return; + } + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + switch(opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + if (shift_amount == 1) { + OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg); + OpRegRegReg(kOpAdc, rl_result.high_reg, rl_src.high_reg, rl_src.high_reg); + } else if (shift_amount == 32) { + OpRegCopy(rl_result.high_reg, rl_src.low_reg); + LoadConstant(rl_result.low_reg, 0); + } else if (shift_amount > 31) { + OpRegRegImm(kOpLsl, rl_result.high_reg, rl_src.low_reg, shift_amount - 32); + LoadConstant(rl_result.low_reg, 0); + } else { + OpRegRegImm(kOpLsl, rl_result.high_reg, rl_src.high_reg, shift_amount); + OpRegRegRegShift(kOpOr, rl_result.high_reg, rl_result.high_reg, rl_src.low_reg, + EncodeShift(kArmLsr, 32 - shift_amount)); + OpRegRegImm(kOpLsl, rl_result.low_reg, rl_src.low_reg, shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.low_reg, rl_src.high_reg); + OpRegRegImm(kOpAsr, rl_result.high_reg, rl_src.high_reg, 31); + } else if (shift_amount > 31) { + OpRegRegImm(kOpAsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32); + OpRegRegImm(kOpAsr, rl_result.high_reg, rl_src.high_reg, 31); + } else { + int t_reg = AllocTemp(); + OpRegRegImm(kOpLsr, t_reg, rl_src.low_reg, shift_amount); + OpRegRegRegShift(kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg, + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(t_reg); + OpRegRegImm(kOpAsr, rl_result.high_reg, rl_src.high_reg, shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.low_reg, rl_src.high_reg); + LoadConstant(rl_result.high_reg, 0); + } else if (shift_amount > 31) { + OpRegRegImm(kOpLsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32); + LoadConstant(rl_result.high_reg, 0); + } else { + int t_reg = AllocTemp(); + OpRegRegImm(kOpLsr, t_reg, rl_src.low_reg, shift_amount); + OpRegRegRegShift(kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg, + EncodeShift(kArmLsl, 32 - shift_amount)); + FreeTemp(t_reg); + OpRegRegImm(kOpLsr, rl_result.high_reg, rl_src.high_reg, shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + } + StoreValueWide(rl_dest, rl_result); +} + +void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { + if (!rl_src2.is_const) { + // Don't bother with special handling for subtract from immediate. + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + return; + } + } else { + // Normalize + if (!rl_src2.is_const) { + DCHECK(rl_src1.is_const); + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + } + } + if (BadOverlap(rl_src1, rl_dest)) { + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + return; + } + DCHECK(rl_src2.is_const); + int64_t val = mir_graph_->ConstantValueWide(rl_src2); + uint32_t val_lo = Low32Bits(val); + uint32_t val_hi = High32Bits(val); + int32_t mod_imm_lo = ModifiedImmediate(val_lo); + int32_t mod_imm_hi = ModifiedImmediate(val_hi); + + // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit + switch(opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + return; + } + break; + default: + break; + } + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + // NOTE: once we've done the EvalLoc on dest, we can no longer bail. + switch (opcode) { + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + NewLIR3(kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo); + NewLIR3(kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi); + break; + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + if ((val_lo != 0) || (rl_result.low_reg != rl_src1.low_reg)) { + OpRegRegImm(kOpOr, rl_result.low_reg, rl_src1.low_reg, val_lo); + } + if ((val_hi != 0) || (rl_result.high_reg != rl_src1.high_reg)) { + OpRegRegImm(kOpOr, rl_result.high_reg, rl_src1.high_reg, val_hi); + } + break; + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + OpRegRegImm(kOpXor, rl_result.low_reg, rl_src1.low_reg, val_lo); + OpRegRegImm(kOpXor, rl_result.high_reg, rl_src1.high_reg, val_hi); + break; + case Instruction::AND_LONG: + case Instruction::AND_LONG_2ADDR: + if ((val_lo != 0xffffffff) || (rl_result.low_reg != rl_src1.low_reg)) { + OpRegRegImm(kOpAnd, rl_result.low_reg, rl_src1.low_reg, val_lo); + } + if ((val_hi != 0xffffffff) || (rl_result.high_reg != rl_src1.high_reg)) { + OpRegRegImm(kOpAnd, rl_result.high_reg, rl_src1.high_reg, val_hi); + } + break; + case Instruction::SUB_LONG_2ADDR: + case Instruction::SUB_LONG: + NewLIR3(kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo); + NewLIR3(kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi); + break; + default: + LOG(FATAL) << "Unexpected opcode " << opcode; + } + StoreValueWide(rl_dest, rl_result); +} + +} // namespace art diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc new file mode 100644 index 0000000000..4bece136bc --- /dev/null +++ b/compiler/dex/quick/arm/target_arm.cc @@ -0,0 +1,769 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <string> + +#include "arm_lir.h" +#include "codegen_arm.h" +#include "dex/compiler_internals.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +static int core_regs[] = {r0, r1, r2, r3, rARM_SUSPEND, r5, r6, r7, r8, rARM_SELF, r10, + r11, r12, rARM_SP, rARM_LR, rARM_PC}; +static int ReservedRegs[] = {rARM_SUSPEND, rARM_SELF, rARM_SP, rARM_LR, rARM_PC}; +static int FpRegs[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7, + fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15, + fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23, + fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31}; +static int core_temps[] = {r0, r1, r2, r3, r12}; +static int fp_temps[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7, + fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15}; + +RegLocation ArmMir2Lir::LocCReturn() +{ + RegLocation res = ARM_LOC_C_RETURN; + return res; +} + +RegLocation ArmMir2Lir::LocCReturnWide() +{ + RegLocation res = ARM_LOC_C_RETURN_WIDE; + return res; +} + +RegLocation ArmMir2Lir::LocCReturnFloat() +{ + RegLocation res = ARM_LOC_C_RETURN_FLOAT; + return res; +} + +RegLocation ArmMir2Lir::LocCReturnDouble() +{ + RegLocation res = ARM_LOC_C_RETURN_DOUBLE; + return res; +} + +// Return a target-dependent special register. +int ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { + int res = INVALID_REG; + switch (reg) { + case kSelf: res = rARM_SELF; break; + case kSuspend: res = rARM_SUSPEND; break; + case kLr: res = rARM_LR; break; + case kPc: res = rARM_PC; break; + case kSp: res = rARM_SP; break; + case kArg0: res = rARM_ARG0; break; + case kArg1: res = rARM_ARG1; break; + case kArg2: res = rARM_ARG2; break; + case kArg3: res = rARM_ARG3; break; + case kFArg0: res = rARM_FARG0; break; + case kFArg1: res = rARM_FARG1; break; + case kFArg2: res = rARM_FARG2; break; + case kFArg3: res = rARM_FARG3; break; + case kRet0: res = rARM_RET0; break; + case kRet1: res = rARM_RET1; break; + case kInvokeTgt: res = rARM_INVOKE_TGT; break; + case kCount: res = rARM_COUNT; break; + } + return res; +} + + +// Create a double from a pair of singles. +int ArmMir2Lir::S2d(int low_reg, int high_reg) +{ + return ARM_S2D(low_reg, high_reg); +} + +// Return mask to strip off fp reg flags and bias. +uint32_t ArmMir2Lir::FpRegMask() +{ + return ARM_FP_REG_MASK; +} + +// True if both regs single, both core or both double. +bool ArmMir2Lir::SameRegType(int reg1, int reg2) +{ + return (ARM_REGTYPE(reg1) == ARM_REGTYPE(reg2)); +} + +/* + * Decode the register id. + */ +uint64_t ArmMir2Lir::GetRegMaskCommon(int reg) +{ + uint64_t seed; + int shift; + int reg_id; + + + reg_id = reg & 0x1f; + /* Each double register is equal to a pair of single-precision FP registers */ + seed = ARM_DOUBLEREG(reg) ? 3 : 1; + /* FP register starts at bit position 16 */ + shift = ARM_FPREG(reg) ? kArmFPReg0 : 0; + /* Expand the double register id into single offset */ + shift += reg_id; + return (seed << shift); +} + +uint64_t ArmMir2Lir::GetPCUseDefEncoding() +{ + return ENCODE_ARM_REG_PC; +} + +void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir) +{ + DCHECK_EQ(cu_->instruction_set, kThumb2); + + // Thumb2 specific setup + uint64_t flags = ArmMir2Lir::EncodingMap[lir->opcode].flags; + int opcode = lir->opcode; + + if (flags & REG_DEF_SP) { + lir->def_mask |= ENCODE_ARM_REG_SP; + } + + if (flags & REG_USE_SP) { + lir->use_mask |= ENCODE_ARM_REG_SP; + } + + if (flags & REG_DEF_LIST0) { + lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } + + if (flags & REG_DEF_LIST1) { + lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } + + if (flags & REG_DEF_FPCS_LIST0) { + lir->def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } + + if (flags & REG_DEF_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->def_mask, lir->operands[1] + i); + } + } + + if (flags & REG_USE_PC) { + lir->use_mask |= ENCODE_ARM_REG_PC; + } + + /* Conservatively treat the IT block */ + if (flags & IS_IT) { + lir->def_mask = ENCODE_ALL; + } + + if (flags & REG_USE_LIST0) { + lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } + + if (flags & REG_USE_LIST1) { + lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } + + if (flags & REG_USE_FPCS_LIST0) { + lir->use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } + + if (flags & REG_USE_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->use_mask, lir->operands[1] + i); + } + } + /* Fixup for kThumbPush/lr and kThumbPop/pc */ + if (opcode == kThumbPush || opcode == kThumbPop) { + uint64_t r8Mask = GetRegMaskCommon(r8); + if ((opcode == kThumbPush) && (lir->use_mask & r8Mask)) { + lir->use_mask &= ~r8Mask; + lir->use_mask |= ENCODE_ARM_REG_LR; + } else if ((opcode == kThumbPop) && (lir->def_mask & r8Mask)) { + lir->def_mask &= ~r8Mask; + lir->def_mask |= ENCODE_ARM_REG_PC; + } + } + if (flags & REG_DEF_LR) { + lir->def_mask |= ENCODE_ARM_REG_LR; + } +} + +ArmConditionCode ArmMir2Lir::ArmConditionEncoding(ConditionCode ccode) +{ + ArmConditionCode res; + switch (ccode) { + case kCondEq: res = kArmCondEq; break; + case kCondNe: res = kArmCondNe; break; + case kCondCs: res = kArmCondCs; break; + case kCondCc: res = kArmCondCc; break; + case kCondMi: res = kArmCondMi; break; + case kCondPl: res = kArmCondPl; break; + case kCondVs: res = kArmCondVs; break; + case kCondVc: res = kArmCondVc; break; + case kCondHi: res = kArmCondHi; break; + case kCondLs: res = kArmCondLs; break; + case kCondGe: res = kArmCondGe; break; + case kCondLt: res = kArmCondLt; break; + case kCondGt: res = kArmCondGt; break; + case kCondLe: res = kArmCondLe; break; + case kCondAl: res = kArmCondAl; break; + case kCondNv: res = kArmCondNv; break; + default: + LOG(FATAL) << "Bad condition code " << ccode; + res = static_cast<ArmConditionCode>(0); // Quiet gcc + } + return res; +} + +static const char* core_reg_names[16] = { + "r0", + "r1", + "r2", + "r3", + "r4", + "r5", + "r6", + "r7", + "r8", + "rSELF", + "r10", + "r11", + "r12", + "sp", + "lr", + "pc", +}; + + +static const char* shift_names[4] = { + "lsl", + "lsr", + "asr", + "ror"}; + +/* Decode and print a ARM register name */ +static char* DecodeRegList(int opcode, int vector, char* buf) +{ + int i; + bool printed = false; + buf[0] = 0; + for (i = 0; i < 16; i++, vector >>= 1) { + if (vector & 0x1) { + int reg_id = i; + if (opcode == kThumbPush && i == 8) { + reg_id = r14lr; + } else if (opcode == kThumbPop && i == 8) { + reg_id = r15pc; + } + if (printed) { + sprintf(buf + strlen(buf), ", r%d", reg_id); + } else { + printed = true; + sprintf(buf, "r%d", reg_id); + } + } + } + return buf; +} + +static char* DecodeFPCSRegList(int count, int base, char* buf) +{ + sprintf(buf, "s%d", base); + for (int i = 1; i < count; i++) { + sprintf(buf + strlen(buf), ", s%d",base + i); + } + return buf; +} + +static int ExpandImmediate(int value) +{ + int mode = (value & 0xf00) >> 8; + uint32_t bits = value & 0xff; + switch (mode) { + case 0: + return bits; + case 1: + return (bits << 16) | bits; + case 2: + return (bits << 24) | (bits << 8); + case 3: + return (bits << 24) | (bits << 16) | (bits << 8) | bits; + default: + break; + } + bits = (bits | 0x80) << 24; + return bits >> (((value & 0xf80) >> 7) - 8); +} + +const char* cc_names[] = {"eq","ne","cs","cc","mi","pl","vs","vc", + "hi","ls","ge","lt","gt","le","al","nv"}; +/* + * Interpret a format string and build a string no longer than size + * See format key in Assemble.c. + */ +std::string ArmMir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) +{ + std::string buf; + int i; + const char* fmt_end = &fmt[strlen(fmt)]; + char tbuf[256]; + const char* name; + char nc; + while (fmt < fmt_end) { + int operand; + if (*fmt == '!') { + fmt++; + DCHECK_LT(fmt, fmt_end); + nc = *fmt++; + if (nc=='!') { + strcpy(tbuf, "!"); + } else { + DCHECK_LT(fmt, fmt_end); + DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U); + operand = lir->operands[nc-'0']; + switch (*fmt++) { + case 'H': + if (operand != 0) { + sprintf(tbuf, ", %s %d",shift_names[operand & 0x3], operand >> 2); + } else { + strcpy(tbuf,""); + } + break; + case 'B': + switch (operand) { + case kSY: + name = "sy"; + break; + case kST: + name = "st"; + break; + case kISH: + name = "ish"; + break; + case kISHST: + name = "ishst"; + break; + case kNSH: + name = "nsh"; + break; + case kNSHST: + name = "shst"; + break; + default: + name = "DecodeError2"; + break; + } + strcpy(tbuf, name); + break; + case 'b': + strcpy(tbuf,"0000"); + for (i=3; i>= 0; i--) { + tbuf[i] += operand & 1; + operand >>= 1; + } + break; + case 'n': + operand = ~ExpandImmediate(operand); + sprintf(tbuf,"%d [%#x]", operand, operand); + break; + case 'm': + operand = ExpandImmediate(operand); + sprintf(tbuf,"%d [%#x]", operand, operand); + break; + case 's': + sprintf(tbuf,"s%d",operand & ARM_FP_REG_MASK); + break; + case 'S': + sprintf(tbuf,"d%d",(operand & ARM_FP_REG_MASK) >> 1); + break; + case 'h': + sprintf(tbuf,"%04x", operand); + break; + case 'M': + case 'd': + sprintf(tbuf,"%d", operand); + break; + case 'C': + DCHECK_LT(operand, static_cast<int>( + sizeof(core_reg_names)/sizeof(core_reg_names[0]))); + sprintf(tbuf,"%s",core_reg_names[operand]); + break; + case 'E': + sprintf(tbuf,"%d", operand*4); + break; + case 'F': + sprintf(tbuf,"%d", operand*2); + break; + case 'c': + strcpy(tbuf, cc_names[operand]); + break; + case 't': + sprintf(tbuf,"0x%08x (L%p)", + reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + + (operand << 1), + lir->target); + break; + case 'u': { + int offset_1 = lir->operands[0]; + int offset_2 = NEXT_LIR(lir)->operands[0]; + uintptr_t target = + (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & + ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) & + 0xfffffffc; + sprintf(tbuf, "%p", reinterpret_cast<void *>(target)); + break; + } + + /* Nothing to print for BLX_2 */ + case 'v': + strcpy(tbuf, "see above"); + break; + case 'R': + DecodeRegList(lir->opcode, operand, tbuf); + break; + case 'P': + DecodeFPCSRegList(operand, 16, tbuf); + break; + case 'Q': + DecodeFPCSRegList(operand, 0, tbuf); + break; + default: + strcpy(tbuf,"DecodeError1"); + break; + } + buf += tbuf; + } + } else { + buf += *fmt++; + } + } + return buf; +} + +void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefix) +{ + char buf[256]; + buf[0] = 0; + + if (mask == ENCODE_ALL) { + strcpy(buf, "all"); + } else { + char num[8]; + int i; + + for (i = 0; i < kArmRegEnd; i++) { + if (mask & (1ULL << i)) { + sprintf(num, "%d ", i); + strcat(buf, num); + } + } + + if (mask & ENCODE_CCODE) { + strcat(buf, "cc "); + } + if (mask & ENCODE_FP_STATUS) { + strcat(buf, "fpcc "); + } + + /* Memory bits */ + if (arm_lir && (mask & ENCODE_DALVIK_REG)) { + sprintf(buf + strlen(buf), "dr%d%s", arm_lir->alias_info & 0xffff, + (arm_lir->alias_info & 0x80000000) ? "(+1)" : ""); + } + if (mask & ENCODE_LITERAL) { + strcat(buf, "lit "); + } + + if (mask & ENCODE_HEAP_REF) { + strcat(buf, "heap "); + } + if (mask & ENCODE_MUST_NOT_ALIAS) { + strcat(buf, "noalias "); + } + } + if (buf[0]) { + LOG(INFO) << prefix << ": " << buf; + } +} + +bool ArmMir2Lir::IsUnconditionalBranch(LIR* lir) +{ + return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond)); +} + +ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) + : Mir2Lir(cu, mir_graph, arena) { + // Sanity check - make sure encoding map lines up. + for (int i = 0; i < kArmLast; i++) { + if (ArmMir2Lir::EncodingMap[i].opcode != i) { + LOG(FATAL) << "Encoding order for " << ArmMir2Lir::EncodingMap[i].name + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(ArmMir2Lir::EncodingMap[i].opcode); + } + } +} + +Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena) { + return new ArmMir2Lir(cu, mir_graph, arena); +} + +/* + * Alloc a pair of core registers, or a double. Low reg in low byte, + * high reg in next byte. + */ +int ArmMir2Lir::AllocTypedTempPair(bool fp_hint, int reg_class) +{ + int high_reg; + int low_reg; + int res = 0; + + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { + low_reg = AllocTempDouble(); + high_reg = low_reg + 1; + } else { + low_reg = AllocTemp(); + high_reg = AllocTemp(); + } + res = (low_reg & 0xff) | ((high_reg & 0xff) << 8); + return res; +} + +int ArmMir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) +{ + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) + return AllocTempFloat(); + return AllocTemp(); +} + +void ArmMir2Lir::CompilerInitializeRegAlloc() +{ + int num_regs = sizeof(core_regs)/sizeof(*core_regs); + int num_reserved = sizeof(ReservedRegs)/sizeof(*ReservedRegs); + int num_temps = sizeof(core_temps)/sizeof(*core_temps); + int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs); + int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps); + reg_pool_ = static_cast<RegisterPool*>(arena_->NewMem(sizeof(*reg_pool_), true, + ArenaAllocator::kAllocRegAlloc)); + reg_pool_->num_core_regs = num_regs; + reg_pool_->core_regs = reinterpret_cast<RegisterInfo*> + (arena_->NewMem(num_regs * sizeof(*reg_pool_->core_regs), true, + ArenaAllocator::kAllocRegAlloc)); + reg_pool_->num_fp_regs = num_fp_regs; + reg_pool_->FPRegs = static_cast<RegisterInfo*> + (arena_->NewMem(num_fp_regs * sizeof(*reg_pool_->FPRegs), true, + ArenaAllocator::kAllocRegAlloc)); + CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs); + CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs); + // Keep special registers from being allocated + for (int i = 0; i < num_reserved; i++) { + if (NO_SUSPEND && (ReservedRegs[i] == rARM_SUSPEND)) { + //To measure cost of suspend check + continue; + } + MarkInUse(ReservedRegs[i]); + } + // Mark temp regs - all others not in use can be used for promotion + for (int i = 0; i < num_temps; i++) { + MarkTemp(core_temps[i]); + } + for (int i = 0; i < num_fp_temps; i++) { + MarkTemp(fp_temps[i]); + } + + // Start allocation at r2 in an attempt to avoid clobbering return values + reg_pool_->next_core_reg = r2; +} + +void ArmMir2Lir::FreeRegLocTemps(RegLocation rl_keep, + RegLocation rl_free) +{ + if ((rl_free.low_reg != rl_keep.low_reg) && (rl_free.low_reg != rl_keep.high_reg) && + (rl_free.high_reg != rl_keep.low_reg) && (rl_free.high_reg != rl_keep.high_reg)) { + // No overlap, free both + FreeTemp(rl_free.low_reg); + FreeTemp(rl_free.high_reg); + } +} +/* + * TUNING: is true leaf? Can't just use METHOD_IS_LEAF to determine as some + * instructions might call out to C/assembly helper functions. Until + * machinery is in place, always spill lr. + */ + +void ArmMir2Lir::AdjustSpillMask() +{ + core_spill_mask_ |= (1 << rARM_LR); + num_core_spills_++; +} + +/* + * Mark a callee-save fp register as promoted. Note that + * vpush/vpop uses contiguous register lists so we must + * include any holes in the mask. Associate holes with + * Dalvik register INVALID_VREG (0xFFFFU). + */ +void ArmMir2Lir::MarkPreservedSingle(int v_reg, int reg) +{ + DCHECK_GE(reg, ARM_FP_REG_MASK + ARM_FP_CALLEE_SAVE_BASE); + reg = (reg & ARM_FP_REG_MASK) - ARM_FP_CALLEE_SAVE_BASE; + // Ensure fp_vmap_table is large enough + int table_size = fp_vmap_table_.size(); + for (int i = table_size; i < (reg + 1); i++) { + fp_vmap_table_.push_back(INVALID_VREG); + } + // Add the current mapping + fp_vmap_table_[reg] = v_reg; + // Size of fp_vmap_table is high-water mark, use to set mask + num_fp_spills_ = fp_vmap_table_.size(); + fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << ARM_FP_CALLEE_SAVE_BASE; +} + +void ArmMir2Lir::FlushRegWide(int reg1, int reg2) +{ + RegisterInfo* info1 = GetRegInfo(reg1); + RegisterInfo* info2 = GetRegInfo(reg2); + DCHECK(info1 && info2 && info1->pair && info2->pair && + (info1->partner == info2->reg) && + (info2->partner == info1->reg)); + if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) { + if (!(info1->is_temp && info2->is_temp)) { + /* Should not happen. If it does, there's a problem in eval_loc */ + LOG(FATAL) << "Long half-temp, half-promoted"; + } + + info1->dirty = false; + info2->dirty = false; + if (mir_graph_->SRegToVReg(info2->s_reg) < + mir_graph_->SRegToVReg(info1->s_reg)) + info1 = info2; + int v_reg = mir_graph_->SRegToVReg(info1->s_reg); + StoreBaseDispWide(rARM_SP, VRegOffset(v_reg), info1->reg, info1->partner); + } +} + +void ArmMir2Lir::FlushReg(int reg) +{ + RegisterInfo* info = GetRegInfo(reg); + if (info->live && info->dirty) { + info->dirty = false; + int v_reg = mir_graph_->SRegToVReg(info->s_reg); + StoreBaseDisp(rARM_SP, VRegOffset(v_reg), reg, kWord); + } +} + +/* Give access to the target-dependent FP register encoding to common code */ +bool ArmMir2Lir::IsFpReg(int reg) { + return ARM_FPREG(reg); +} + +/* Clobber all regs that might be used by an external C call */ +void ArmMir2Lir::ClobberCalleeSave() +{ + Clobber(r0); + Clobber(r1); + Clobber(r2); + Clobber(r3); + Clobber(r12); + Clobber(r14lr); + Clobber(fr0); + Clobber(fr1); + Clobber(fr2); + Clobber(fr3); + Clobber(fr4); + Clobber(fr5); + Clobber(fr6); + Clobber(fr7); + Clobber(fr8); + Clobber(fr9); + Clobber(fr10); + Clobber(fr11); + Clobber(fr12); + Clobber(fr13); + Clobber(fr14); + Clobber(fr15); +} + +RegLocation ArmMir2Lir::GetReturnWideAlt() +{ + RegLocation res = LocCReturnWide(); + res.low_reg = r2; + res.high_reg = r3; + Clobber(r2); + Clobber(r3); + MarkInUse(r2); + MarkInUse(r3); + MarkPair(res.low_reg, res.high_reg); + return res; +} + +RegLocation ArmMir2Lir::GetReturnAlt() +{ + RegLocation res = LocCReturn(); + res.low_reg = r1; + Clobber(r1); + MarkInUse(r1); + return res; +} + +ArmMir2Lir::RegisterInfo* ArmMir2Lir::GetRegInfo(int reg) +{ + return ARM_FPREG(reg) ? ®_pool_->FPRegs[reg & ARM_FP_REG_MASK] + : ®_pool_->core_regs[reg]; +} + +/* To be used when explicitly managing register use */ +void ArmMir2Lir::LockCallTemps() +{ + LockTemp(r0); + LockTemp(r1); + LockTemp(r2); + LockTemp(r3); +} + +/* To be used when explicitly managing register use */ +void ArmMir2Lir::FreeCallTemps() +{ + FreeTemp(r0); + FreeTemp(r1); + FreeTemp(r2); + FreeTemp(r3); +} + +int ArmMir2Lir::LoadHelper(int offset) +{ + LoadWordDisp(rARM_SELF, offset, rARM_LR); + return rARM_LR; +} + +uint64_t ArmMir2Lir::GetTargetInstFlags(int opcode) +{ + return ArmMir2Lir::EncodingMap[opcode].flags; +} + +const char* ArmMir2Lir::GetTargetInstName(int opcode) +{ + return ArmMir2Lir::EncodingMap[opcode].name; +} + +const char* ArmMir2Lir::GetTargetInstFmt(int opcode) +{ + return ArmMir2Lir::EncodingMap[opcode].fmt; +} + +} // namespace art diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc new file mode 100644 index 0000000000..abf921f8ad --- /dev/null +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -0,0 +1,1093 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_lir.h" +#include "codegen_arm.h" +#include "dex/quick/mir_to_lir-inl.h" + +namespace art { + +/* This file contains codegen for the Thumb ISA. */ + +static int EncodeImmSingle(int value) +{ + int res; + int bit_a = (value & 0x80000000) >> 31; + int not_bit_b = (value & 0x40000000) >> 30; + int bit_b = (value & 0x20000000) >> 29; + int b_smear = (value & 0x3e000000) >> 25; + int slice = (value & 0x01f80000) >> 19; + int zeroes = (value & 0x0007ffff); + if (zeroes != 0) + return -1; + if (bit_b) { + if ((not_bit_b != 0) || (b_smear != 0x1f)) + return -1; + } else { + if ((not_bit_b != 1) || (b_smear != 0x0)) + return -1; + } + res = (bit_a << 7) | (bit_b << 6) | slice; + return res; +} + +/* + * Determine whether value can be encoded as a Thumb2 floating point + * immediate. If not, return -1. If so return encoded 8-bit value. + */ +static int EncodeImmDouble(int64_t value) +{ + int res; + int bit_a = (value & 0x8000000000000000ll) >> 63; + int not_bit_b = (value & 0x4000000000000000ll) >> 62; + int bit_b = (value & 0x2000000000000000ll) >> 61; + int b_smear = (value & 0x3fc0000000000000ll) >> 54; + int slice = (value & 0x003f000000000000ll) >> 48; + uint64_t zeroes = (value & 0x0000ffffffffffffll); + if (zeroes != 0) + return -1; + if (bit_b) { + if ((not_bit_b != 0) || (b_smear != 0xff)) + return -1; + } else { + if ((not_bit_b != 1) || (b_smear != 0x0)) + return -1; + } + res = (bit_a << 7) | (bit_b << 6) | slice; + return res; +} + +LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) +{ + DCHECK(ARM_SINGLEREG(r_dest)); + if (value == 0) { + // TODO: we need better info about the target CPU. a vector exclusive or + // would probably be better here if we could rely on its existance. + // Load an immediate +2.0 (which encodes to 0) + NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0); + // +0.0 = +2.0 - +2.0 + return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest); + } else { + int encoded_imm = EncodeImmSingle(value); + if (encoded_imm >= 0) { + return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm); + } + } + LIR* data_target = ScanLiteralPool(literal_list_, value, 0); + if (data_target == NULL) { + data_target = AddWordData(&literal_list_, value); + } + LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, + r_dest, r15pc, 0, 0, 0, data_target); + SetMemRefType(load_pc_rel, true, kLiteral); + load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); + AppendLIR(load_pc_rel); + return load_pc_rel; +} + +static int LeadingZeros(uint32_t val) +{ + uint32_t alt; + int n; + int count; + + count = 16; + n = 32; + do { + alt = val >> count; + if (alt != 0) { + n = n - count; + val = alt; + } + count >>= 1; + } while (count); + return n - val; +} + +/* + * Determine whether value can be encoded as a Thumb2 modified + * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. + */ +int ArmMir2Lir::ModifiedImmediate(uint32_t value) +{ + int z_leading; + int z_trailing; + uint32_t b0 = value & 0xff; + + /* Note: case of value==0 must use 0:000:0:0000000 encoding */ + if (value <= 0xFF) + return b0; // 0:000:a:bcdefgh + if (value == ((b0 << 16) | b0)) + return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */ + if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0)) + return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */ + b0 = (value >> 8) & 0xff; + if (value == ((b0 << 24) | (b0 << 8))) + return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */ + /* Can we do it with rotation? */ + z_leading = LeadingZeros(value); + z_trailing = 32 - LeadingZeros(~value & (value - 1)); + /* A run of eight or fewer active bits? */ + if ((z_leading + z_trailing) < 24) + return -1; /* No - bail */ + /* left-justify the constant, discarding msb (known to be 1) */ + value <<= z_leading + 1; + /* Create bcdefgh */ + value >>= 25; + /* Put it all together */ + return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */ +} + +bool ArmMir2Lir::InexpensiveConstantInt(int32_t value) +{ + return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); +} + +bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) +{ + return EncodeImmSingle(value) >= 0; +} + +bool ArmMir2Lir::InexpensiveConstantLong(int64_t value) +{ + return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); +} + +bool ArmMir2Lir::InexpensiveConstantDouble(int64_t value) +{ + return EncodeImmDouble(value) >= 0; +} + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool. + * + * No additional register clobbering operation performed. Use this version when + * 1) r_dest is freshly returned from AllocTemp or + * 2) The codegen is under fixed register usage + */ +LIR* ArmMir2Lir::LoadConstantNoClobber(int r_dest, int value) +{ + LIR* res; + int mod_imm; + + if (ARM_FPREG(r_dest)) { + return LoadFPConstantValue(r_dest, value); + } + + /* See if the value can be constructed cheaply */ + if (ARM_LOWREG(r_dest) && (value >= 0) && (value <= 255)) { + return NewLIR2(kThumbMovImm, r_dest, value); + } + /* Check Modified immediate special cases */ + mod_imm = ModifiedImmediate(value); + if (mod_imm >= 0) { + res = NewLIR2(kThumb2MovImmShift, r_dest, mod_imm); + return res; + } + mod_imm = ModifiedImmediate(~value); + if (mod_imm >= 0) { + res = NewLIR2(kThumb2MvnImm12, r_dest, mod_imm); + return res; + } + /* 16-bit immediate? */ + if ((value & 0xffff) == value) { + res = NewLIR2(kThumb2MovImm16, r_dest, value); + return res; + } + /* Do a low/high pair */ + res = NewLIR2(kThumb2MovImm16, r_dest, Low16Bits(value)); + NewLIR2(kThumb2MovImm16H, r_dest, High16Bits(value)); + return res; +} + +LIR* ArmMir2Lir::OpUnconditionalBranch(LIR* target) +{ + LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched during assembly*/); + res->target = target; + return res; +} + +LIR* ArmMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) +{ + LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */, + ArmConditionEncoding(cc)); + branch->target = target; + return branch; +} + +LIR* ArmMir2Lir::OpReg(OpKind op, int r_dest_src) +{ + ArmOpcode opcode = kThumbBkpt; + switch (op) { + case kOpBlx: + opcode = kThumbBlxR; + break; + default: + LOG(FATAL) << "Bad opcode " << op; + } + return NewLIR1(opcode, r_dest_src); +} + +LIR* ArmMir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, + int shift) +{ + bool thumb_form = ((shift == 0) && ARM_LOWREG(r_dest_src1) && ARM_LOWREG(r_src2)); + ArmOpcode opcode = kThumbBkpt; + switch (op) { + case kOpAdc: + opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR; + break; + case kOpAnd: + opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR; + break; + case kOpBic: + opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR; + break; + case kOpCmn: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR; + break; + case kOpCmp: + if (thumb_form) + opcode = kThumbCmpRR; + else if ((shift == 0) && !ARM_LOWREG(r_dest_src1) && !ARM_LOWREG(r_src2)) + opcode = kThumbCmpHH; + else if ((shift == 0) && ARM_LOWREG(r_dest_src1)) + opcode = kThumbCmpLH; + else if (shift == 0) + opcode = kThumbCmpHL; + else + opcode = kThumb2CmpRR; + break; + case kOpXor: + opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR; + break; + case kOpMov: + DCHECK_EQ(shift, 0); + if (ARM_LOWREG(r_dest_src1) && ARM_LOWREG(r_src2)) + opcode = kThumbMovRR; + else if (!ARM_LOWREG(r_dest_src1) && !ARM_LOWREG(r_src2)) + opcode = kThumbMovRR_H2H; + else if (ARM_LOWREG(r_dest_src1)) + opcode = kThumbMovRR_H2L; + else + opcode = kThumbMovRR_L2H; + break; + case kOpMul: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR; + break; + case kOpMvn: + opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR; + break; + case kOpNeg: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR; + break; + case kOpOr: + opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR; + break; + case kOpSbc: + opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR; + break; + case kOpTst: + opcode = (thumb_form) ? kThumbTst : kThumb2TstRR; + break; + case kOpLsl: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR; + break; + case kOpLsr: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR; + break; + case kOpAsr: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR; + break; + case kOpRor: + DCHECK_EQ(shift, 0); + opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR; + break; + case kOpAdd: + opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; + break; + case kOpSub: + opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; + break; + case kOp2Byte: + DCHECK_EQ(shift, 0); + return NewLIR4(kThumb2Sbfx, r_dest_src1, r_src2, 0, 8); + case kOp2Short: + DCHECK_EQ(shift, 0); + return NewLIR4(kThumb2Sbfx, r_dest_src1, r_src2, 0, 16); + case kOp2Char: + DCHECK_EQ(shift, 0); + return NewLIR4(kThumb2Ubfx, r_dest_src1, r_src2, 0, 16); + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + DCHECK_GE(static_cast<int>(opcode), 0); + if (EncodingMap[opcode].flags & IS_BINARY_OP) + return NewLIR2(opcode, r_dest_src1, r_src2); + else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { + if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) + return NewLIR3(opcode, r_dest_src1, r_src2, shift); + else + return NewLIR3(opcode, r_dest_src1, r_dest_src1, r_src2); + } else if (EncodingMap[opcode].flags & IS_QUAD_OP) + return NewLIR4(opcode, r_dest_src1, r_dest_src1, r_src2, shift); + else { + LOG(FATAL) << "Unexpected encoding operand count"; + return NULL; + } +} + +LIR* ArmMir2Lir::OpRegReg(OpKind op, int r_dest_src1, int r_src2) +{ + return OpRegRegShift(op, r_dest_src1, r_src2, 0); +} + +LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, + int r_src2, int shift) +{ + ArmOpcode opcode = kThumbBkpt; + bool thumb_form = (shift == 0) && ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1) && + ARM_LOWREG(r_src2); + switch (op) { + case kOpAdd: + opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; + break; + case kOpSub: + opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; + break; + case kOpRsub: + opcode = kThumb2RsubRRR; + break; + case kOpAdc: + opcode = kThumb2AdcRRR; + break; + case kOpAnd: + opcode = kThumb2AndRRR; + break; + case kOpBic: + opcode = kThumb2BicRRR; + break; + case kOpXor: + opcode = kThumb2EorRRR; + break; + case kOpMul: + DCHECK_EQ(shift, 0); + opcode = kThumb2MulRRR; + break; + case kOpOr: + opcode = kThumb2OrrRRR; + break; + case kOpSbc: + opcode = kThumb2SbcRRR; + break; + case kOpLsl: + DCHECK_EQ(shift, 0); + opcode = kThumb2LslRRR; + break; + case kOpLsr: + DCHECK_EQ(shift, 0); + opcode = kThumb2LsrRRR; + break; + case kOpAsr: + DCHECK_EQ(shift, 0); + opcode = kThumb2AsrRRR; + break; + case kOpRor: + DCHECK_EQ(shift, 0); + opcode = kThumb2RorRRR; + break; + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + DCHECK_GE(static_cast<int>(opcode), 0); + if (EncodingMap[opcode].flags & IS_QUAD_OP) + return NewLIR4(opcode, r_dest, r_src1, r_src2, shift); + else { + DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); + return NewLIR3(opcode, r_dest, r_src1, r_src2); + } +} + +LIR* ArmMir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2) +{ + return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0); +} + +LIR* ArmMir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) +{ + LIR* res; + bool neg = (value < 0); + int abs_value = (neg) ? -value : value; + ArmOpcode opcode = kThumbBkpt; + ArmOpcode alt_opcode = kThumbBkpt; + bool all_low_regs = (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1)); + int mod_imm = ModifiedImmediate(value); + int mod_imm_neg = ModifiedImmediate(-value); + + switch (op) { + case kOpLsl: + if (all_low_regs) + return NewLIR3(kThumbLslRRI5, r_dest, r_src1, value); + else + return NewLIR3(kThumb2LslRRI5, r_dest, r_src1, value); + case kOpLsr: + if (all_low_regs) + return NewLIR3(kThumbLsrRRI5, r_dest, r_src1, value); + else + return NewLIR3(kThumb2LsrRRI5, r_dest, r_src1, value); + case kOpAsr: + if (all_low_regs) + return NewLIR3(kThumbAsrRRI5, r_dest, r_src1, value); + else + return NewLIR3(kThumb2AsrRRI5, r_dest, r_src1, value); + case kOpRor: + return NewLIR3(kThumb2RorRRI5, r_dest, r_src1, value); + case kOpAdd: + if (ARM_LOWREG(r_dest) && (r_src1 == r13sp) && + (value <= 1020) && ((value & 0x3)==0)) { + return NewLIR3(kThumbAddSpRel, r_dest, r_src1, value >> 2); + } else if (ARM_LOWREG(r_dest) && (r_src1 == r15pc) && + (value <= 1020) && ((value & 0x3)==0)) { + return NewLIR3(kThumbAddPcRel, r_dest, r_src1, value >> 2); + } + // Note: intentional fallthrough + case kOpSub: + if (all_low_regs && ((abs_value & 0x7) == abs_value)) { + if (op == kOpAdd) + opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3; + else + opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3; + return NewLIR3(opcode, r_dest, r_src1, abs_value); + } else if ((abs_value & 0xff) == abs_value) { + if (op == kOpAdd) + opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12; + else + opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12; + return NewLIR3(opcode, r_dest, r_src1, abs_value); + } + if (mod_imm_neg >= 0) { + op = (op == kOpAdd) ? kOpSub : kOpAdd; + mod_imm = mod_imm_neg; + } + if (op == kOpSub) { + opcode = kThumb2SubRRI8; + alt_opcode = kThumb2SubRRR; + } else { + opcode = kThumb2AddRRI8; + alt_opcode = kThumb2AddRRR; + } + break; + case kOpRsub: + opcode = kThumb2RsubRRI8; + alt_opcode = kThumb2RsubRRR; + break; + case kOpAdc: + opcode = kThumb2AdcRRI8; + alt_opcode = kThumb2AdcRRR; + break; + case kOpSbc: + opcode = kThumb2SbcRRI8; + alt_opcode = kThumb2SbcRRR; + break; + case kOpOr: + opcode = kThumb2OrrRRI8; + alt_opcode = kThumb2OrrRRR; + break; + case kOpAnd: + opcode = kThumb2AndRRI8; + alt_opcode = kThumb2AndRRR; + break; + case kOpXor: + opcode = kThumb2EorRRI8; + alt_opcode = kThumb2EorRRR; + break; + case kOpMul: + //TUNING: power of 2, shift & add + mod_imm = -1; + alt_opcode = kThumb2MulRRR; + break; + case kOpCmp: { + int mod_imm = ModifiedImmediate(value); + LIR* res; + if (mod_imm >= 0) { + res = NewLIR2(kThumb2CmpRI12, r_src1, mod_imm); + } else { + int r_tmp = AllocTemp(); + res = LoadConstant(r_tmp, value); + OpRegReg(kOpCmp, r_src1, r_tmp); + FreeTemp(r_tmp); + } + return res; + } + default: + LOG(FATAL) << "Bad opcode: " << op; + } + + if (mod_imm >= 0) { + return NewLIR3(opcode, r_dest, r_src1, mod_imm); + } else { + int r_scratch = AllocTemp(); + LoadConstant(r_scratch, value); + if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) + res = NewLIR4(alt_opcode, r_dest, r_src1, r_scratch, 0); + else + res = NewLIR3(alt_opcode, r_dest, r_src1, r_scratch); + FreeTemp(r_scratch); + return res; + } +} + +/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */ +LIR* ArmMir2Lir::OpRegImm(OpKind op, int r_dest_src1, int value) +{ + bool neg = (value < 0); + int abs_value = (neg) ? -value : value; + bool short_form = (((abs_value & 0xff) == abs_value) && ARM_LOWREG(r_dest_src1)); + ArmOpcode opcode = kThumbBkpt; + switch (op) { + case kOpAdd: + if ( !neg && (r_dest_src1 == r13sp) && (value <= 508)) { /* sp */ + DCHECK_EQ((value & 0x3), 0); + return NewLIR1(kThumbAddSpI7, value >> 2); + } else if (short_form) { + opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8; + } + break; + case kOpSub: + if (!neg && (r_dest_src1 == r13sp) && (value <= 508)) { /* sp */ + DCHECK_EQ((value & 0x3), 0); + return NewLIR1(kThumbSubSpI7, value >> 2); + } else if (short_form) { + opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8; + } + break; + case kOpCmp: + if (ARM_LOWREG(r_dest_src1) && short_form) + opcode = (short_form) ? kThumbCmpRI8 : kThumbCmpRR; + else if (ARM_LOWREG(r_dest_src1)) + opcode = kThumbCmpRR; + else { + short_form = false; + opcode = kThumbCmpHL; + } + break; + default: + /* Punt to OpRegRegImm - if bad case catch it there */ + short_form = false; + break; + } + if (short_form) + return NewLIR2(opcode, r_dest_src1, abs_value); + else { + return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); + } +} + +LIR* ArmMir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) +{ + LIR* res = NULL; + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); + int target_reg = S2d(r_dest_lo, r_dest_hi); + if (ARM_FPREG(r_dest_lo)) { + if ((val_lo == 0) && (val_hi == 0)) { + // TODO: we need better info about the target CPU. a vector exclusive or + // would probably be better here if we could rely on its existance. + // Load an immediate +2.0 (which encodes to 0) + NewLIR2(kThumb2Vmovd_IMM8, target_reg, 0); + // +0.0 = +2.0 - +2.0 + res = NewLIR3(kThumb2Vsubd, target_reg, target_reg, target_reg); + } else { + int encoded_imm = EncodeImmDouble(value); + if (encoded_imm >= 0) { + res = NewLIR2(kThumb2Vmovd_IMM8, target_reg, encoded_imm); + } + } + } else { + if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) { + res = LoadConstantNoClobber(r_dest_lo, val_lo); + LoadConstantNoClobber(r_dest_hi, val_hi); + } + } + if (res == NULL) { + // No short form - load from the literal pool. + LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); + if (data_target == NULL) { + data_target = AddWideData(&literal_list_, val_lo, val_hi); + } + if (ARM_FPREG(r_dest_lo)) { + res = RawLIR(current_dalvik_offset_, kThumb2Vldrd, + target_reg, r15pc, 0, 0, 0, data_target); + } else { + res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8, + r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target); + } + SetMemRefType(res, true, kLiteral); + res->alias_info = reinterpret_cast<uintptr_t>(data_target); + AppendLIR(res); + } + return res; +} + +int ArmMir2Lir::EncodeShift(int code, int amount) { + return ((amount & 0x1f) << 2) | code; +} + +LIR* ArmMir2Lir::LoadBaseIndexed(int rBase, int r_index, int r_dest, + int scale, OpSize size) +{ + bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_dest); + LIR* load; + ArmOpcode opcode = kThumbBkpt; + bool thumb_form = (all_low_regs && (scale == 0)); + int reg_ptr; + + if (ARM_FPREG(r_dest)) { + if (ARM_SINGLEREG(r_dest)) { + DCHECK((size == kWord) || (size == kSingle)); + opcode = kThumb2Vldrs; + size = kSingle; + } else { + DCHECK(ARM_DOUBLEREG(r_dest)); + DCHECK((size == kLong) || (size == kDouble)); + DCHECK_EQ((r_dest & 0x1), 0); + opcode = kThumb2Vldrd; + size = kDouble; + } + } else { + if (size == kSingle) + size = kWord; + } + + switch (size) { + case kDouble: // fall-through + case kSingle: + reg_ptr = AllocTemp(); + if (scale) { + NewLIR4(kThumb2AddRRR, reg_ptr, rBase, r_index, + EncodeShift(kArmLsl, scale)); + } else { + OpRegRegReg(kOpAdd, reg_ptr, rBase, r_index); + } + load = NewLIR3(opcode, r_dest, reg_ptr, 0); + FreeTemp(reg_ptr); + return load; + case kWord: + opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR; + break; + case kUnsignedHalf: + opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR; + break; + case kSignedHalf: + opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR; + break; + case kUnsignedByte: + opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR; + break; + case kSignedByte: + opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR; + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + if (thumb_form) + load = NewLIR3(opcode, r_dest, rBase, r_index); + else + load = NewLIR4(opcode, r_dest, rBase, r_index, scale); + + return load; +} + +LIR* ArmMir2Lir::StoreBaseIndexed(int rBase, int r_index, int r_src, + int scale, OpSize size) +{ + bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_src); + LIR* store = NULL; + ArmOpcode opcode = kThumbBkpt; + bool thumb_form = (all_low_regs && (scale == 0)); + int reg_ptr; + + if (ARM_FPREG(r_src)) { + if (ARM_SINGLEREG(r_src)) { + DCHECK((size == kWord) || (size == kSingle)); + opcode = kThumb2Vstrs; + size = kSingle; + } else { + DCHECK(ARM_DOUBLEREG(r_src)); + DCHECK((size == kLong) || (size == kDouble)); + DCHECK_EQ((r_src & 0x1), 0); + opcode = kThumb2Vstrd; + size = kDouble; + } + } else { + if (size == kSingle) + size = kWord; + } + + switch (size) { + case kDouble: // fall-through + case kSingle: + reg_ptr = AllocTemp(); + if (scale) { + NewLIR4(kThumb2AddRRR, reg_ptr, rBase, r_index, + EncodeShift(kArmLsl, scale)); + } else { + OpRegRegReg(kOpAdd, reg_ptr, rBase, r_index); + } + store = NewLIR3(opcode, r_src, reg_ptr, 0); + FreeTemp(reg_ptr); + return store; + case kWord: + opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR; + break; + case kUnsignedHalf: + case kSignedHalf: + opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR; + break; + case kUnsignedByte: + case kSignedByte: + opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR; + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + if (thumb_form) + store = NewLIR3(opcode, r_src, rBase, r_index); + else + store = NewLIR4(opcode, r_src, rBase, r_index, scale); + + return store; +} + +/* + * Load value from base + displacement. Optionally perform null check + * on base (which must have an associated s_reg and MIR). If not + * performing null check, incoming MIR can be null. + */ +LIR* ArmMir2Lir::LoadBaseDispBody(int rBase, int displacement, int r_dest, + int r_dest_hi, OpSize size, int s_reg) +{ + LIR* load = NULL; + ArmOpcode opcode = kThumbBkpt; + bool short_form = false; + bool thumb2Form = (displacement < 4092 && displacement >= 0); + bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_dest)); + int encoded_disp = displacement; + bool is64bit = false; + bool already_generated = false; + switch (size) { + case kDouble: + case kLong: + is64bit = true; + if (ARM_FPREG(r_dest)) { + if (ARM_SINGLEREG(r_dest)) { + DCHECK(ARM_FPREG(r_dest_hi)); + r_dest = S2d(r_dest, r_dest_hi); + } + opcode = kThumb2Vldrd; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + break; + } else { + if (displacement <= 1020) { + load = NewLIR4(kThumb2LdrdI8, r_dest, r_dest_hi, rBase, displacement >> 2); + } else { + load = LoadBaseDispBody(rBase, displacement, r_dest, + -1, kWord, s_reg); + LoadBaseDispBody(rBase, displacement + 4, r_dest_hi, + -1, kWord, INVALID_SREG); + } + already_generated = true; + } + case kSingle: + case kWord: + if (ARM_FPREG(r_dest)) { + opcode = kThumb2Vldrs; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + break; + } + if (ARM_LOWREG(r_dest) && (rBase == r15pc) && + (displacement <= 1020) && (displacement >= 0)) { + short_form = true; + encoded_disp >>= 2; + opcode = kThumbLdrPcRel; + } else if (ARM_LOWREG(r_dest) && (rBase == r13sp) && + (displacement <= 1020) && (displacement >= 0)) { + short_form = true; + encoded_disp >>= 2; + opcode = kThumbLdrSpRel; + } else if (all_low_regs && displacement < 128 && displacement >= 0) { + DCHECK_EQ((displacement & 0x3), 0); + short_form = true; + encoded_disp >>= 2; + opcode = kThumbLdrRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrRRI12; + } + break; + case kUnsignedHalf: + if (all_low_regs && displacement < 64 && displacement >= 0) { + DCHECK_EQ((displacement & 0x1), 0); + short_form = true; + encoded_disp >>= 1; + opcode = kThumbLdrhRRI5; + } else if (displacement < 4092 && displacement >= 0) { + short_form = true; + opcode = kThumb2LdrhRRI12; + } + break; + case kSignedHalf: + if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrshRRI12; + } + break; + case kUnsignedByte: + if (all_low_regs && displacement < 32 && displacement >= 0) { + short_form = true; + opcode = kThumbLdrbRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrbRRI12; + } + break; + case kSignedByte: + if (thumb2Form) { + short_form = true; + opcode = kThumb2LdrsbRRI12; + } + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + + if (!already_generated) { + if (short_form) { + load = NewLIR3(opcode, r_dest, rBase, encoded_disp); + } else { + int reg_offset = AllocTemp(); + LoadConstant(reg_offset, encoded_disp); + load = LoadBaseIndexed(rBase, reg_offset, r_dest, 0, size); + FreeTemp(reg_offset); + } + } + + // TODO: in future may need to differentiate Dalvik accesses w/ spills + if (rBase == rARM_SP) { + AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, is64bit); + } + return load; +} + +LIR* ArmMir2Lir::LoadBaseDisp(int rBase, int displacement, int r_dest, + OpSize size, int s_reg) +{ + return LoadBaseDispBody(rBase, displacement, r_dest, -1, size, s_reg); +} + +LIR* ArmMir2Lir::LoadBaseDispWide(int rBase, int displacement, int r_dest_lo, + int r_dest_hi, int s_reg) +{ + return LoadBaseDispBody(rBase, displacement, r_dest_lo, r_dest_hi, kLong, s_reg); +} + + +LIR* ArmMir2Lir::StoreBaseDispBody(int rBase, int displacement, + int r_src, int r_src_hi, OpSize size) { + LIR* store = NULL; + ArmOpcode opcode = kThumbBkpt; + bool short_form = false; + bool thumb2Form = (displacement < 4092 && displacement >= 0); + bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_src)); + int encoded_disp = displacement; + bool is64bit = false; + bool already_generated = false; + switch (size) { + case kLong: + case kDouble: + is64bit = true; + if (!ARM_FPREG(r_src)) { + if (displacement <= 1020) { + store = NewLIR4(kThumb2StrdI8, r_src, r_src_hi, rBase, displacement >> 2); + } else { + store = StoreBaseDispBody(rBase, displacement, r_src, -1, kWord); + StoreBaseDispBody(rBase, displacement + 4, r_src_hi, -1, kWord); + } + already_generated = true; + } else { + if (ARM_SINGLEREG(r_src)) { + DCHECK(ARM_FPREG(r_src_hi)); + r_src = S2d(r_src, r_src_hi); + } + opcode = kThumb2Vstrd; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + } + break; + case kSingle: + case kWord: + if (ARM_FPREG(r_src)) { + DCHECK(ARM_SINGLEREG(r_src)); + opcode = kThumb2Vstrs; + if (displacement <= 1020) { + short_form = true; + encoded_disp >>= 2; + } + break; + } + if (ARM_LOWREG(r_src) && (rBase == r13sp) && + (displacement <= 1020) && (displacement >= 0)) { + short_form = true; + encoded_disp >>= 2; + opcode = kThumbStrSpRel; + } else if (all_low_regs && displacement < 128 && displacement >= 0) { + DCHECK_EQ((displacement & 0x3), 0); + short_form = true; + encoded_disp >>= 2; + opcode = kThumbStrRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2StrRRI12; + } + break; + case kUnsignedHalf: + case kSignedHalf: + if (all_low_regs && displacement < 64 && displacement >= 0) { + DCHECK_EQ((displacement & 0x1), 0); + short_form = true; + encoded_disp >>= 1; + opcode = kThumbStrhRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2StrhRRI12; + } + break; + case kUnsignedByte: + case kSignedByte: + if (all_low_regs && displacement < 32 && displacement >= 0) { + short_form = true; + opcode = kThumbStrbRRI5; + } else if (thumb2Form) { + short_form = true; + opcode = kThumb2StrbRRI12; + } + break; + default: + LOG(FATAL) << "Bad size: " << size; + } + if (!already_generated) { + if (short_form) { + store = NewLIR3(opcode, r_src, rBase, encoded_disp); + } else { + int r_scratch = AllocTemp(); + LoadConstant(r_scratch, encoded_disp); + store = StoreBaseIndexed(rBase, r_scratch, r_src, 0, size); + FreeTemp(r_scratch); + } + } + + // TODO: In future, may need to differentiate Dalvik & spill accesses + if (rBase == rARM_SP) { + AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, is64bit); + } + return store; +} + +LIR* ArmMir2Lir::StoreBaseDisp(int rBase, int displacement, int r_src, + OpSize size) +{ + return StoreBaseDispBody(rBase, displacement, r_src, -1, size); +} + +LIR* ArmMir2Lir::StoreBaseDispWide(int rBase, int displacement, + int r_src_lo, int r_src_hi) +{ + return StoreBaseDispBody(rBase, displacement, r_src_lo, r_src_hi, kLong); +} + +LIR* ArmMir2Lir::OpFpRegCopy(int r_dest, int r_src) +{ + int opcode; + DCHECK_EQ(ARM_DOUBLEREG(r_dest), ARM_DOUBLEREG(r_src)); + if (ARM_DOUBLEREG(r_dest)) { + opcode = kThumb2Vmovd; + } else { + if (ARM_SINGLEREG(r_dest)) { + opcode = ARM_SINGLEREG(r_src) ? kThumb2Vmovs : kThumb2Fmsr; + } else { + DCHECK(ARM_SINGLEREG(r_src)); + opcode = kThumb2Fmrs; + } + } + LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest, r_src); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +LIR* ArmMir2Lir::OpThreadMem(OpKind op, int thread_offset) +{ + LOG(FATAL) << "Unexpected use of OpThreadMem for Arm"; + return NULL; +} + +LIR* ArmMir2Lir::OpMem(OpKind op, int rBase, int disp) +{ + LOG(FATAL) << "Unexpected use of OpMem for Arm"; + return NULL; +} + +LIR* ArmMir2Lir::StoreBaseIndexedDisp(int rBase, int r_index, int scale, + int displacement, int r_src, int r_src_hi, OpSize size, + int s_reg) +{ + LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm"; + return NULL; +} + +LIR* ArmMir2Lir::OpRegMem(OpKind op, int r_dest, int rBase, int offset) +{ + LOG(FATAL) << "Unexpected use of OpRegMem for Arm"; + return NULL; +} + +LIR* ArmMir2Lir::LoadBaseIndexedDisp(int rBase, int r_index, int scale, + int displacement, int r_dest, int r_dest_hi, OpSize size, + int s_reg) +{ + LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm"; + return NULL; +} + +} // namespace art diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc new file mode 100644 index 0000000000..5c10c4ce2b --- /dev/null +++ b/compiler/dex/quick/codegen_util.cc @@ -0,0 +1,1109 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex/compiler_internals.h" +#include "dex_file-inl.h" +#include "gc_map.h" +#include "mir_to_lir-inl.h" +#include "verifier/dex_gc_map.h" +#include "verifier/method_verifier.h" + +namespace art { + +bool Mir2Lir::IsInexpensiveConstant(RegLocation rl_src) +{ + bool res = false; + if (rl_src.is_const) { + if (rl_src.wide) { + if (rl_src.fp) { + res = InexpensiveConstantDouble(mir_graph_->ConstantValueWide(rl_src)); + } else { + res = InexpensiveConstantLong(mir_graph_->ConstantValueWide(rl_src)); + } + } else { + if (rl_src.fp) { + res = InexpensiveConstantFloat(mir_graph_->ConstantValue(rl_src)); + } else { + res = InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src)); + } + } + } + return res; +} + +void Mir2Lir::MarkSafepointPC(LIR* inst) +{ + inst->def_mask = ENCODE_ALL; + LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC); + DCHECK_EQ(safepoint_pc->def_mask, ENCODE_ALL); +} + +bool Mir2Lir::FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put) +{ + return cu_->compiler_driver->ComputeInstanceFieldInfo( + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, is_volatile, is_put); +} + +/* Convert an instruction to a NOP */ +void Mir2Lir::NopLIR( LIR* lir) +{ + lir->flags.is_nop = true; +} + +void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) +{ + uint64_t *mask_ptr; + uint64_t mask = ENCODE_MEM;; + DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE)); + if (is_load) { + mask_ptr = &lir->use_mask; + } else { + mask_ptr = &lir->def_mask; + } + /* Clear out the memref flags */ + *mask_ptr &= ~mask; + /* ..and then add back the one we need */ + switch (mem_type) { + case kLiteral: + DCHECK(is_load); + *mask_ptr |= ENCODE_LITERAL; + break; + case kDalvikReg: + *mask_ptr |= ENCODE_DALVIK_REG; + break; + case kHeapRef: + *mask_ptr |= ENCODE_HEAP_REF; + break; + case kMustNotAlias: + /* Currently only loads can be marked as kMustNotAlias */ + DCHECK(!(GetTargetInstFlags(lir->opcode) & IS_STORE)); + *mask_ptr |= ENCODE_MUST_NOT_ALIAS; + break; + default: + LOG(FATAL) << "Oat: invalid memref kind - " << mem_type; + } +} + +/* + * Mark load/store instructions that access Dalvik registers through the stack. + */ +void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, + bool is64bit) +{ + SetMemRefType(lir, is_load, kDalvikReg); + + /* + * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit + * access. + */ + lir->alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit); +} + +/* + * Debugging macros + */ +#define DUMP_RESOURCE_MASK(X) + +/* Pretty-print a LIR instruction */ +void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) +{ + int offset = lir->offset; + int dest = lir->operands[0]; + const bool dump_nop = (cu_->enable_debug & (1 << kDebugShowNops)); + + /* Handle pseudo-ops individually, and all regular insns as a group */ + switch (lir->opcode) { + case kPseudoMethodEntry: + LOG(INFO) << "-------- method entry " + << PrettyMethod(cu_->method_idx, *cu_->dex_file); + break; + case kPseudoMethodExit: + LOG(INFO) << "-------- Method_Exit"; + break; + case kPseudoBarrier: + LOG(INFO) << "-------- BARRIER"; + break; + case kPseudoEntryBlock: + LOG(INFO) << "-------- entry offset: 0x" << std::hex << dest; + break; + case kPseudoDalvikByteCodeBoundary: + if (lir->operands[0] == 0) { + lir->operands[0] = reinterpret_cast<uintptr_t>("No instruction string"); + } + LOG(INFO) << "-------- dalvik offset: 0x" << std::hex + << lir->dalvik_offset << " @ " << reinterpret_cast<char*>(lir->operands[0]); + break; + case kPseudoExitBlock: + LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest; + break; + case kPseudoPseudoAlign4: + LOG(INFO) << reinterpret_cast<uintptr_t>(base_addr) + offset << " (0x" << std::hex + << offset << "): .align4"; + break; + case kPseudoEHBlockLabel: + LOG(INFO) << "Exception_Handling:"; + break; + case kPseudoTargetLabel: + case kPseudoNormalBlockLabel: + LOG(INFO) << "L" << reinterpret_cast<void*>(lir) << ":"; + break; + case kPseudoThrowTarget: + LOG(INFO) << "LT" << reinterpret_cast<void*>(lir) << ":"; + break; + case kPseudoIntrinsicRetry: + LOG(INFO) << "IR" << reinterpret_cast<void*>(lir) << ":"; + break; + case kPseudoSuspendTarget: + LOG(INFO) << "LS" << reinterpret_cast<void*>(lir) << ":"; + break; + case kPseudoSafepointPC: + LOG(INFO) << "LsafepointPC_0x" << std::hex << lir->offset << "_" << lir->dalvik_offset << ":"; + break; + case kPseudoExportedPC: + LOG(INFO) << "LexportedPC_0x" << std::hex << lir->offset << "_" << lir->dalvik_offset << ":"; + break; + case kPseudoCaseLabel: + LOG(INFO) << "LC" << reinterpret_cast<void*>(lir) << ": Case target 0x" + << std::hex << lir->operands[0] << "|" << std::dec << + lir->operands[0]; + break; + default: + if (lir->flags.is_nop && !dump_nop) { + break; + } else { + std::string op_name(BuildInsnString(GetTargetInstName(lir->opcode), + lir, base_addr)); + std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode), + lir, base_addr)); + LOG(INFO) << StringPrintf("%05x: %-9s%s%s", + reinterpret_cast<unsigned int>(base_addr + offset), + op_name.c_str(), op_operands.c_str(), + lir->flags.is_nop ? "(nop)" : ""); + } + break; + } + + if (lir->use_mask && (!lir->flags.is_nop || dump_nop)) { + DUMP_RESOURCE_MASK(DumpResourceMask((LIR* ) lir, lir->use_mask, "use")); + } + if (lir->def_mask && (!lir->flags.is_nop || dump_nop)) { + DUMP_RESOURCE_MASK(DumpResourceMask((LIR* ) lir, lir->def_mask, "def")); + } +} + +void Mir2Lir::DumpPromotionMap() +{ + int num_regs = cu_->num_dalvik_registers + cu_->num_compiler_temps + 1; + for (int i = 0; i < num_regs; i++) { + PromotionMap v_reg_map = promotion_map_[i]; + std::string buf; + if (v_reg_map.fp_location == kLocPhysReg) { + StringAppendF(&buf, " : s%d", v_reg_map.FpReg & FpRegMask()); + } + + std::string buf3; + if (i < cu_->num_dalvik_registers) { + StringAppendF(&buf3, "%02d", i); + } else if (i == mir_graph_->GetMethodSReg()) { + buf3 = "Method*"; + } else { + StringAppendF(&buf3, "ct%d", i - cu_->num_dalvik_registers); + } + + LOG(INFO) << StringPrintf("V[%s] -> %s%d%s", buf3.c_str(), + v_reg_map.core_location == kLocPhysReg ? + "r" : "SP+", v_reg_map.core_location == kLocPhysReg ? + v_reg_map.core_reg : SRegOffset(i), + buf.c_str()); + } +} + +/* Dump a mapping table */ +void Mir2Lir::DumpMappingTable(const char* table_name, const std::string& descriptor, + const std::string& name, const std::string& signature, + const std::vector<uint32_t>& v) { + if (v.size() > 0) { + std::string line(StringPrintf("\n %s %s%s_%s_table[%zu] = {", table_name, + descriptor.c_str(), name.c_str(), signature.c_str(), v.size())); + std::replace(line.begin(), line.end(), ';', '_'); + LOG(INFO) << line; + for (uint32_t i = 0; i < v.size(); i+=2) { + line = StringPrintf(" {0x%05x, 0x%04x},", v[i], v[i+1]); + LOG(INFO) << line; + } + LOG(INFO) <<" };\n\n"; + } +} + +/* Dump instructions and constant pool contents */ +void Mir2Lir::CodegenDump() +{ + LOG(INFO) << "Dumping LIR insns for " + << PrettyMethod(cu_->method_idx, *cu_->dex_file); + LIR* lir_insn; + int insns_size = cu_->code_item->insns_size_in_code_units_; + + LOG(INFO) << "Regs (excluding ins) : " << cu_->num_regs; + LOG(INFO) << "Ins : " << cu_->num_ins; + LOG(INFO) << "Outs : " << cu_->num_outs; + LOG(INFO) << "CoreSpills : " << num_core_spills_; + LOG(INFO) << "FPSpills : " << num_fp_spills_; + LOG(INFO) << "CompilerTemps : " << cu_->num_compiler_temps; + LOG(INFO) << "Frame size : " << frame_size_; + LOG(INFO) << "code size is " << total_size_ << + " bytes, Dalvik size is " << insns_size * 2; + LOG(INFO) << "expansion factor: " + << static_cast<float>(total_size_) / static_cast<float>(insns_size * 2); + DumpPromotionMap(); + for (lir_insn = first_lir_insn_; lir_insn != NULL; lir_insn = lir_insn->next) { + DumpLIRInsn(lir_insn, 0); + } + for (lir_insn = literal_list_; lir_insn != NULL; lir_insn = lir_insn->next) { + LOG(INFO) << StringPrintf("%x (%04x): .word (%#x)", lir_insn->offset, lir_insn->offset, + lir_insn->operands[0]); + } + + const DexFile::MethodId& method_id = + cu_->dex_file->GetMethodId(cu_->method_idx); + std::string signature(cu_->dex_file->GetMethodSignature(method_id)); + std::string name(cu_->dex_file->GetMethodName(method_id)); + std::string descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id)); + + // Dump mapping tables + DumpMappingTable("PC2Dex_MappingTable", descriptor, name, signature, pc2dex_mapping_table_); + DumpMappingTable("Dex2PC_MappingTable", descriptor, name, signature, dex2pc_mapping_table_); +} + +/* + * Search the existing constants in the literal pool for an exact or close match + * within specified delta (greater or equal to 0). + */ +LIR* Mir2Lir::ScanLiteralPool(LIR* data_target, int value, unsigned int delta) +{ + while (data_target) { + if ((static_cast<unsigned>(value - data_target->operands[0])) <= delta) + return data_target; + data_target = data_target->next; + } + return NULL; +} + +/* Search the existing constants in the literal pool for an exact wide match */ +LIR* Mir2Lir::ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi) +{ + bool lo_match = false; + LIR* lo_target = NULL; + while (data_target) { + if (lo_match && (data_target->operands[0] == val_hi)) { + // Record high word in case we need to expand this later. + lo_target->operands[1] = val_hi; + return lo_target; + } + lo_match = false; + if (data_target->operands[0] == val_lo) { + lo_match = true; + lo_target = data_target; + } + data_target = data_target->next; + } + return NULL; +} + +/* + * The following are building blocks to insert constants into the pool or + * instruction streams. + */ + +/* Add a 32-bit constant to the constant pool */ +LIR* Mir2Lir::AddWordData(LIR* *constant_list_p, int value) +{ + /* Add the constant to the literal pool */ + if (constant_list_p) { + LIR* new_value = static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocData)); + new_value->operands[0] = value; + new_value->next = *constant_list_p; + *constant_list_p = new_value; + return new_value; + } + return NULL; +} + +/* Add a 64-bit constant to the constant pool or mixed with code */ +LIR* Mir2Lir::AddWideData(LIR* *constant_list_p, int val_lo, int val_hi) +{ + AddWordData(constant_list_p, val_hi); + return AddWordData(constant_list_p, val_lo); +} + +static void PushWord(std::vector<uint8_t>&buf, int data) { + buf.push_back( data & 0xff); + buf.push_back( (data >> 8) & 0xff); + buf.push_back( (data >> 16) & 0xff); + buf.push_back( (data >> 24) & 0xff); +} + +static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) { + while (buf.size() < offset) { + buf.push_back(0); + } +} + +/* Write the literal pool to the output stream */ +void Mir2Lir::InstallLiteralPools() +{ + AlignBuffer(code_buffer_, data_offset_); + LIR* data_lir = literal_list_; + while (data_lir != NULL) { + PushWord(code_buffer_, data_lir->operands[0]); + data_lir = NEXT_LIR(data_lir); + } + // Push code and method literals, record offsets for the compiler to patch. + data_lir = code_literal_list_; + while (data_lir != NULL) { + uint32_t target = data_lir->operands[0]; + cu_->compiler_driver->AddCodePatch(cu_->dex_file, + cu_->method_idx, + cu_->invoke_type, + target, + static_cast<InvokeType>(data_lir->operands[1]), + code_buffer_.size()); + const DexFile::MethodId& id = cu_->dex_file->GetMethodId(target); + // unique based on target to ensure code deduplication works + uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id); + PushWord(code_buffer_, unique_patch_value); + data_lir = NEXT_LIR(data_lir); + } + data_lir = method_literal_list_; + while (data_lir != NULL) { + uint32_t target = data_lir->operands[0]; + cu_->compiler_driver->AddMethodPatch(cu_->dex_file, + cu_->method_idx, + cu_->invoke_type, + target, + static_cast<InvokeType>(data_lir->operands[1]), + code_buffer_.size()); + const DexFile::MethodId& id = cu_->dex_file->GetMethodId(target); + // unique based on target to ensure code deduplication works + uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id); + PushWord(code_buffer_, unique_patch_value); + data_lir = NEXT_LIR(data_lir); + } +} + +/* Write the switch tables to the output stream */ +void Mir2Lir::InstallSwitchTables() +{ + GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_); + while (true) { + Mir2Lir::SwitchTable* tab_rec = iterator.Next(); + if (tab_rec == NULL) break; + AlignBuffer(code_buffer_, tab_rec->offset); + /* + * For Arm, our reference point is the address of the bx + * instruction that does the launch, so we have to subtract + * the auto pc-advance. For other targets the reference point + * is a label, so we can use the offset as-is. + */ + int bx_offset = INVALID_OFFSET; + switch (cu_->instruction_set) { + case kThumb2: + bx_offset = tab_rec->anchor->offset + 4; + break; + case kX86: + bx_offset = 0; + break; + case kMips: + bx_offset = tab_rec->anchor->offset; + break; + default: LOG(FATAL) << "Unexpected instruction set: " << cu_->instruction_set; + } + if (cu_->verbose) { + LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset; + } + if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { + const int* keys = reinterpret_cast<const int*>(&(tab_rec->table[2])); + for (int elems = 0; elems < tab_rec->table[1]; elems++) { + int disp = tab_rec->targets[elems]->offset - bx_offset; + if (cu_->verbose) { + LOG(INFO) << " Case[" << elems << "] key: 0x" + << std::hex << keys[elems] << ", disp: 0x" + << std::hex << disp; + } + PushWord(code_buffer_, keys[elems]); + PushWord(code_buffer_, + tab_rec->targets[elems]->offset - bx_offset); + } + } else { + DCHECK_EQ(static_cast<int>(tab_rec->table[0]), + static_cast<int>(Instruction::kPackedSwitchSignature)); + for (int elems = 0; elems < tab_rec->table[1]; elems++) { + int disp = tab_rec->targets[elems]->offset - bx_offset; + if (cu_->verbose) { + LOG(INFO) << " Case[" << elems << "] disp: 0x" + << std::hex << disp; + } + PushWord(code_buffer_, tab_rec->targets[elems]->offset - bx_offset); + } + } + } +} + +/* Write the fill array dta to the output stream */ +void Mir2Lir::InstallFillArrayData() +{ + GrowableArray<FillArrayData*>::Iterator iterator(&fill_array_data_); + while (true) { + Mir2Lir::FillArrayData *tab_rec = iterator.Next(); + if (tab_rec == NULL) break; + AlignBuffer(code_buffer_, tab_rec->offset); + for (int i = 0; i < (tab_rec->size + 1) / 2; i++) { + code_buffer_.push_back( tab_rec->table[i] & 0xFF); + code_buffer_.push_back( (tab_rec->table[i] >> 8) & 0xFF); + } + } +} + +static int AssignLiteralOffsetCommon(LIR* lir, int offset) +{ + for (;lir != NULL; lir = lir->next) { + lir->offset = offset; + offset += 4; + } + return offset; +} + +// Make sure we have a code address for every declared catch entry +bool Mir2Lir::VerifyCatchEntries() +{ + bool success = true; + for (std::set<uint32_t>::const_iterator it = mir_graph_->catches_.begin(); + it != mir_graph_->catches_.end(); ++it) { + uint32_t dex_pc = *it; + bool found = false; + for (size_t i = 0; i < dex2pc_mapping_table_.size(); i += 2) { + if (dex_pc == dex2pc_mapping_table_[i+1]) { + found = true; + break; + } + } + if (!found) { + LOG(INFO) << "Missing native PC for catch entry @ 0x" << std::hex << dex_pc; + success = false; + } + } + // Now, try in the other direction + for (size_t i = 0; i < dex2pc_mapping_table_.size(); i += 2) { + uint32_t dex_pc = dex2pc_mapping_table_[i+1]; + if (mir_graph_->catches_.find(dex_pc) == mir_graph_->catches_.end()) { + LOG(INFO) << "Unexpected catch entry @ dex pc 0x" << std::hex << dex_pc; + success = false; + } + } + if (!success) { + LOG(INFO) << "Bad dex2pcMapping table in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + LOG(INFO) << "Entries @ decode: " << mir_graph_->catches_.size() << ", Entries in table: " + << dex2pc_mapping_table_.size()/2; + } + return success; +} + + +void Mir2Lir::CreateMappingTables() +{ + for (LIR* tgt_lir = first_lir_insn_; tgt_lir != NULL; tgt_lir = NEXT_LIR(tgt_lir)) { + if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) { + pc2dex_mapping_table_.push_back(tgt_lir->offset); + pc2dex_mapping_table_.push_back(tgt_lir->dalvik_offset); + } + if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoExportedPC)) { + dex2pc_mapping_table_.push_back(tgt_lir->offset); + dex2pc_mapping_table_.push_back(tgt_lir->dalvik_offset); + } + } + if (kIsDebugBuild) { + DCHECK(VerifyCatchEntries()); + } + combined_mapping_table_.push_back(pc2dex_mapping_table_.size() + + dex2pc_mapping_table_.size()); + combined_mapping_table_.push_back(pc2dex_mapping_table_.size()); + combined_mapping_table_.insert(combined_mapping_table_.end(), pc2dex_mapping_table_.begin(), + pc2dex_mapping_table_.end()); + combined_mapping_table_.insert(combined_mapping_table_.end(), dex2pc_mapping_table_.begin(), + dex2pc_mapping_table_.end()); +} + +class NativePcToReferenceMapBuilder { + public: + NativePcToReferenceMapBuilder(std::vector<uint8_t>* table, + size_t entries, uint32_t max_native_offset, + size_t references_width) : entries_(entries), + references_width_(references_width), in_use_(entries), + table_(table) { + // Compute width in bytes needed to hold max_native_offset. + native_offset_width_ = 0; + while (max_native_offset != 0) { + native_offset_width_++; + max_native_offset >>= 8; + } + // Resize table and set up header. + table->resize((EntryWidth() * entries) + sizeof(uint32_t)); + CHECK_LT(native_offset_width_, 1U << 3); + (*table)[0] = native_offset_width_ & 7; + CHECK_LT(references_width_, 1U << 13); + (*table)[0] |= (references_width_ << 3) & 0xFF; + (*table)[1] = (references_width_ >> 5) & 0xFF; + CHECK_LT(entries, 1U << 16); + (*table)[2] = entries & 0xFF; + (*table)[3] = (entries >> 8) & 0xFF; + } + + void AddEntry(uint32_t native_offset, const uint8_t* references) { + size_t table_index = TableIndex(native_offset); + while (in_use_[table_index]) { + table_index = (table_index + 1) % entries_; + } + in_use_[table_index] = true; + SetNativeOffset(table_index, native_offset); + DCHECK_EQ(native_offset, GetNativeOffset(table_index)); + SetReferences(table_index, references); + } + + private: + size_t TableIndex(uint32_t native_offset) { + return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_; + } + + uint32_t GetNativeOffset(size_t table_index) { + uint32_t native_offset = 0; + size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); + for (size_t i = 0; i < native_offset_width_; i++) { + native_offset |= (*table_)[table_offset + i] << (i * 8); + } + return native_offset; + } + + void SetNativeOffset(size_t table_index, uint32_t native_offset) { + size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); + for (size_t i = 0; i < native_offset_width_; i++) { + (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF; + } + } + + void SetReferences(size_t table_index, const uint8_t* references) { + size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); + memcpy(&(*table_)[table_offset + native_offset_width_], references, references_width_); + } + + size_t EntryWidth() const { + return native_offset_width_ + references_width_; + } + + // Number of entries in the table. + const size_t entries_; + // Number of bytes used to encode the reference bitmap. + const size_t references_width_; + // Number of bytes used to encode a native offset. + size_t native_offset_width_; + // Entries that are in use. + std::vector<bool> in_use_; + // The table we're building. + std::vector<uint8_t>* const table_; +}; + +void Mir2Lir::CreateNativeGcMap() { + const std::vector<uint32_t>& mapping_table = pc2dex_mapping_table_; + uint32_t max_native_offset = 0; + for (size_t i = 0; i < mapping_table.size(); i += 2) { + uint32_t native_offset = mapping_table[i + 0]; + if (native_offset > max_native_offset) { + max_native_offset = native_offset; + } + } + MethodReference method_ref(cu_->dex_file, cu_->method_idx); + const std::vector<uint8_t>* gc_map_raw = verifier::MethodVerifier::GetDexGcMap(method_ref); + verifier::DexPcToReferenceMap dex_gc_map(&(*gc_map_raw)[4], gc_map_raw->size() - 4); + // Compute native offset to references size. + NativePcToReferenceMapBuilder native_gc_map_builder(&native_gc_map_, + mapping_table.size() / 2, max_native_offset, + dex_gc_map.RegWidth()); + + for (size_t i = 0; i < mapping_table.size(); i += 2) { + uint32_t native_offset = mapping_table[i + 0]; + uint32_t dex_pc = mapping_table[i + 1]; + const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); + CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc; + native_gc_map_builder.AddEntry(native_offset, references); + } +} + +/* Determine the offset of each literal field */ +int Mir2Lir::AssignLiteralOffset(int offset) +{ + offset = AssignLiteralOffsetCommon(literal_list_, offset); + offset = AssignLiteralOffsetCommon(code_literal_list_, offset); + offset = AssignLiteralOffsetCommon(method_literal_list_, offset); + return offset; +} + +int Mir2Lir::AssignSwitchTablesOffset(int offset) +{ + GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_); + while (true) { + Mir2Lir::SwitchTable *tab_rec = iterator.Next(); + if (tab_rec == NULL) break; + tab_rec->offset = offset; + if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { + offset += tab_rec->table[1] * (sizeof(int) * 2); + } else { + DCHECK_EQ(static_cast<int>(tab_rec->table[0]), + static_cast<int>(Instruction::kPackedSwitchSignature)); + offset += tab_rec->table[1] * sizeof(int); + } + } + return offset; +} + +int Mir2Lir::AssignFillArrayDataOffset(int offset) +{ + GrowableArray<FillArrayData*>::Iterator iterator(&fill_array_data_); + while (true) { + Mir2Lir::FillArrayData *tab_rec = iterator.Next(); + if (tab_rec == NULL) break; + tab_rec->offset = offset; + offset += tab_rec->size; + // word align + offset = (offset + 3) & ~3; + } + return offset; +} + +// LIR offset assignment. +int Mir2Lir::AssignInsnOffsets() +{ + LIR* lir; + int offset = 0; + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (lir->opcode >= 0) { + if (!lir->flags.is_nop) { + offset += lir->flags.size; + } + } else if (lir->opcode == kPseudoPseudoAlign4) { + if (offset & 0x2) { + offset += 2; + lir->operands[0] = 1; + } else { + lir->operands[0] = 0; + } + } + /* Pseudo opcodes don't consume space */ + } + + return offset; +} + +/* + * Walk the compilation unit and assign offsets to instructions + * and literals and compute the total size of the compiled unit. + */ +void Mir2Lir::AssignOffsets() +{ + int offset = AssignInsnOffsets(); + + /* Const values have to be word aligned */ + offset = (offset + 3) & ~3; + + /* Set up offsets for literals */ + data_offset_ = offset; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + offset = AssignFillArrayDataOffset(offset); + + total_size_ = offset; +} + +/* + * Go over each instruction in the list and calculate the offset from the top + * before sending them off to the assembler. If out-of-range branch distance is + * seen rearrange the instructions a bit to correct it. + */ +void Mir2Lir::AssembleLIR() +{ + AssignOffsets(); + int assembler_retries = 0; + /* + * Assemble here. Note that we generate code with optimistic assumptions + * and if found now to work, we'll have to redo the sequence and retry. + */ + + while (true) { + AssemblerStatus res = AssembleInstructions(0); + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + // Redo offsets and try again + AssignOffsets(); + code_buffer_.clear(); + } + } + + // Install literals + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + CreateMappingTables(); + + CreateNativeGcMap(); +} + +/* + * Insert a kPseudoCaseLabel at the beginning of the Dalvik + * offset vaddr. This label will be used to fix up the case + * branch table during the assembly phase. Be sure to set + * all resource flags on this to prevent code motion across + * target boundaries. KeyVal is just there for debugging. + */ +LIR* Mir2Lir::InsertCaseLabel(int vaddr, int keyVal) +{ + SafeMap<unsigned int, LIR*>::iterator it; + it = boundary_map_.find(vaddr); + if (it == boundary_map_.end()) { + LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr; + } + LIR* new_label = static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR)); + new_label->dalvik_offset = vaddr; + new_label->opcode = kPseudoCaseLabel; + new_label->operands[0] = keyVal; + InsertLIRAfter(it->second, new_label); + return new_label; +} + +void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) +{ + const uint16_t* table = tab_rec->table; + int base_vaddr = tab_rec->vaddr; + const int *targets = reinterpret_cast<const int*>(&table[4]); + int entries = table[1]; + int low_key = s4FromSwitchData(&table[2]); + for (int i = 0; i < entries; i++) { + tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], i + low_key); + } +} + +void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec) +{ + const uint16_t* table = tab_rec->table; + int base_vaddr = tab_rec->vaddr; + int entries = table[1]; + const int* keys = reinterpret_cast<const int*>(&table[2]); + const int* targets = &keys[entries]; + for (int i = 0; i < entries; i++) { + tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], keys[i]); + } +} + +void Mir2Lir::ProcessSwitchTables() +{ + GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_); + while (true) { + Mir2Lir::SwitchTable *tab_rec = iterator.Next(); + if (tab_rec == NULL) break; + if (tab_rec->table[0] == Instruction::kPackedSwitchSignature) { + MarkPackedCaseLabels(tab_rec); + } else if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { + MarkSparseCaseLabels(tab_rec); + } else { + LOG(FATAL) << "Invalid switch table"; + } + } +} + +void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) + /* + * Sparse switch data format: + * ushort ident = 0x0200 magic value + * ushort size number of entries in the table; > 0 + * int keys[size] keys, sorted low-to-high; 32-bit aligned + * int targets[size] branch targets, relative to switch opcode + * + * Total size is (2+size*4) 16-bit code units. + */ +{ + uint16_t ident = table[0]; + int entries = table[1]; + const int* keys = reinterpret_cast<const int*>(&table[2]); + const int* targets = &keys[entries]; + LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident + << ", entries: " << std::dec << entries; + for (int i = 0; i < entries; i++) { + LOG(INFO) << " Key[" << keys[i] << "] -> 0x" << std::hex << targets[i]; + } +} + +void Mir2Lir::DumpPackedSwitchTable(const uint16_t* table) + /* + * Packed switch data format: + * ushort ident = 0x0100 magic value + * ushort size number of entries in the table + * int first_key first (and lowest) switch case value + * int targets[size] branch targets, relative to switch opcode + * + * Total size is (4+size*2) 16-bit code units. + */ +{ + uint16_t ident = table[0]; + const int* targets = reinterpret_cast<const int*>(&table[4]); + int entries = table[1]; + int low_key = s4FromSwitchData(&table[2]); + LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident + << ", entries: " << std::dec << entries << ", low_key: " << low_key; + for (int i = 0; i < entries; i++) { + LOG(INFO) << " Key[" << (i + low_key) << "] -> 0x" << std::hex + << targets[i]; + } +} + +/* + * Set up special LIR to mark a Dalvik byte-code instruction start and + * record it in the boundary_map. NOTE: in cases such as kMirOpCheck in + * which we split a single Dalvik instruction, only the first MIR op + * associated with a Dalvik PC should be entered into the map. + */ +LIR* Mir2Lir::MarkBoundary(int offset, const char* inst_str) +{ + LIR* res = NewLIR1(kPseudoDalvikByteCodeBoundary, reinterpret_cast<uintptr_t>(inst_str)); + if (boundary_map_.find(offset) == boundary_map_.end()) { + boundary_map_.Put(offset, res); + } + return res; +} + +bool Mir2Lir::EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) +{ + bool is_taken; + switch (opcode) { + case Instruction::IF_EQ: is_taken = (src1 == src2); break; + case Instruction::IF_NE: is_taken = (src1 != src2); break; + case Instruction::IF_LT: is_taken = (src1 < src2); break; + case Instruction::IF_GE: is_taken = (src1 >= src2); break; + case Instruction::IF_GT: is_taken = (src1 > src2); break; + case Instruction::IF_LE: is_taken = (src1 <= src2); break; + case Instruction::IF_EQZ: is_taken = (src1 == 0); break; + case Instruction::IF_NEZ: is_taken = (src1 != 0); break; + case Instruction::IF_LTZ: is_taken = (src1 < 0); break; + case Instruction::IF_GEZ: is_taken = (src1 >= 0); break; + case Instruction::IF_GTZ: is_taken = (src1 > 0); break; + case Instruction::IF_LEZ: is_taken = (src1 <= 0); break; + default: + LOG(FATAL) << "Unexpected opcode " << opcode; + is_taken = false; + } + return is_taken; +} + +// Convert relation of src1/src2 to src2/src1 +ConditionCode Mir2Lir::FlipComparisonOrder(ConditionCode before) { + ConditionCode res; + switch (before) { + case kCondEq: res = kCondEq; break; + case kCondNe: res = kCondNe; break; + case kCondLt: res = kCondGt; break; + case kCondGt: res = kCondLt; break; + case kCondLe: res = kCondGe; break; + case kCondGe: res = kCondLe; break; + default: + res = static_cast<ConditionCode>(0); + LOG(FATAL) << "Unexpected ccode " << before; + } + return res; +} + +// TODO: move to mir_to_lir.cc +Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) + : Backend(arena), + literal_list_(NULL), + method_literal_list_(NULL), + code_literal_list_(NULL), + cu_(cu), + mir_graph_(mir_graph), + switch_tables_(arena, 4, kGrowableArraySwitchTables), + fill_array_data_(arena, 4, kGrowableArrayFillArrayData), + throw_launchpads_(arena, 2048, kGrowableArrayThrowLaunchPads), + suspend_launchpads_(arena, 4, kGrowableArraySuspendLaunchPads), + intrinsic_launchpads_(arena, 2048, kGrowableArrayMisc), + data_offset_(0), + total_size_(0), + block_label_list_(NULL), + current_dalvik_offset_(0), + reg_pool_(NULL), + live_sreg_(0), + num_core_spills_(0), + num_fp_spills_(0), + frame_size_(0), + core_spill_mask_(0), + fp_spill_mask_(0), + first_lir_insn_(NULL), + last_lir_insn_(NULL) + { + promotion_map_ = static_cast<PromotionMap*> + (arena_->NewMem((cu_->num_dalvik_registers + cu_->num_compiler_temps + 1) * + sizeof(promotion_map_[0]), true, ArenaAllocator::kAllocRegAlloc)); +} + +void Mir2Lir::Materialize() { + CompilerInitializeRegAlloc(); // Needs to happen after SSA naming + + /* Allocate Registers using simple local allocation scheme */ + SimpleRegAlloc(); + + //FIXME: re-enable by retrieving from mir_graph + SpecialCaseHandler special_case = kNoHandler; + + if (special_case != kNoHandler) { + /* + * Custom codegen for special cases. If for any reason the + * special codegen doesn't succeed, first_lir_insn_ will + * set to NULL; + */ + SpecialMIR2LIR(special_case); + } + + /* Convert MIR to LIR, etc. */ + if (first_lir_insn_ == NULL) { + MethodMIR2LIR(); + } + + /* Method is not empty */ + if (first_lir_insn_) { + + // mark the targets of switch statement case labels + ProcessSwitchTables(); + + /* Convert LIR into machine code. */ + AssembleLIR(); + + if (cu_->verbose) { + CodegenDump(); + } + + } + +} + +CompiledMethod* Mir2Lir::GetCompiledMethod() { + // Combine vmap tables - core regs, then fp regs - into vmap_table + std::vector<uint16_t> vmap_table; + // Core regs may have been inserted out of order - sort first + std::sort(core_vmap_table_.begin(), core_vmap_table_.end()); + for (size_t i = 0 ; i < core_vmap_table_.size(); i++) { + // Copy, stripping out the phys register sort key + vmap_table.push_back(~(-1 << VREG_NUM_WIDTH) & core_vmap_table_[i]); + } + // If we have a frame, push a marker to take place of lr + if (frame_size_ > 0) { + vmap_table.push_back(INVALID_VREG); + } else { + DCHECK_EQ(__builtin_popcount(core_spill_mask_), 0); + DCHECK_EQ(__builtin_popcount(fp_spill_mask_), 0); + } + // Combine vmap tables - core regs, then fp regs. fp regs already sorted + for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { + vmap_table.push_back(fp_vmap_table_[i]); + } + CompiledMethod* result = + new CompiledMethod(cu_->instruction_set, code_buffer_, + frame_size_, core_spill_mask_, fp_spill_mask_, + combined_mapping_table_, vmap_table, native_gc_map_); + return result; +} + +int Mir2Lir::ComputeFrameSize() { + /* Figure out the frame size */ + static const uint32_t kAlignMask = kStackAlignment - 1; + uint32_t size = (num_core_spills_ + num_fp_spills_ + + 1 /* filler word */ + cu_->num_regs + cu_->num_outs + + cu_->num_compiler_temps + 1 /* cur_method* */) + * sizeof(uint32_t); + /* Align and set */ + return (size + kAlignMask) & ~(kAlignMask); +} + +/* + * Append an LIR instruction to the LIR list maintained by a compilation + * unit + */ +void Mir2Lir::AppendLIR(LIR* lir) +{ + if (first_lir_insn_ == NULL) { + DCHECK(last_lir_insn_ == NULL); + last_lir_insn_ = first_lir_insn_ = lir; + lir->prev = lir->next = NULL; + } else { + last_lir_insn_->next = lir; + lir->prev = last_lir_insn_; + lir->next = NULL; + last_lir_insn_ = lir; + } +} + +/* + * Insert an LIR instruction before the current instruction, which cannot be the + * first instruction. + * + * prev_lir <-> new_lir <-> current_lir + */ +void Mir2Lir::InsertLIRBefore(LIR* current_lir, LIR* new_lir) +{ + DCHECK(current_lir->prev != NULL); + LIR *prev_lir = current_lir->prev; + + prev_lir->next = new_lir; + new_lir->prev = prev_lir; + new_lir->next = current_lir; + current_lir->prev = new_lir; +} + +/* + * Insert an LIR instruction after the current instruction, which cannot be the + * first instruction. + * + * current_lir -> new_lir -> old_next + */ +void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) +{ + new_lir->prev = current_lir; + new_lir->next = current_lir->next; + current_lir->next = new_lir; + new_lir->next->prev = new_lir; +} + + +} // namespace art diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc new file mode 100644 index 0000000000..865b9c5c4d --- /dev/null +++ b/compiler/dex/quick/gen_common.cc @@ -0,0 +1,1800 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex/compiler_ir.h" +#include "dex/compiler_internals.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mirror/array.h" +#include "oat/runtime/oat_support_entrypoints.h" +#include "verifier/method_verifier.h" + +namespace art { + +/* + * This source files contains "gen" codegen routines that should + * be applicable to most targets. Only mid-level support utilities + * and "op" calls may be used here. + */ + +/* + * Generate an kPseudoBarrier marker to indicate the boundary of special + * blocks. + */ +void Mir2Lir::GenBarrier() +{ + LIR* barrier = NewLIR0(kPseudoBarrier); + /* Mark all resources as being clobbered */ + barrier->def_mask = -1; +} + +// FIXME: need to do some work to split out targets with +// condition codes and those without +LIR* Mir2Lir::GenCheck(ConditionCode c_code, ThrowKind kind) +{ + DCHECK_NE(cu_->instruction_set, kMips); + LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, current_dalvik_offset_); + LIR* branch = OpCondBranch(c_code, tgt); + // Remember branch target - will process later + throw_launchpads_.Insert(tgt); + return branch; +} + +LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, int reg, int imm_val, ThrowKind kind) +{ + LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, current_dalvik_offset_, reg, imm_val); + LIR* branch; + if (c_code == kCondAl) { + branch = OpUnconditionalBranch(tgt); + } else { + branch = OpCmpImmBranch(c_code, reg, imm_val, tgt); + } + // Remember branch target - will process later + throw_launchpads_.Insert(tgt); + return branch; +} + +/* Perform null-check on a register. */ +LIR* Mir2Lir::GenNullCheck(int s_reg, int m_reg, int opt_flags) +{ + if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && + opt_flags & MIR_IGNORE_NULL_CHECK) { + return NULL; + } + return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); +} + +/* Perform check on two registers */ +LIR* Mir2Lir::GenRegRegCheck(ConditionCode c_code, int reg1, int reg2, + ThrowKind kind) +{ + LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, current_dalvik_offset_, reg1, reg2); + LIR* branch = OpCmpBranch(c_code, reg1, reg2, tgt); + // Remember branch target - will process later + throw_launchpads_.Insert(tgt); + return branch; +} + +void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, + RegLocation rl_src2, LIR* taken, + LIR* fall_through) +{ + ConditionCode cond; + switch (opcode) { + case Instruction::IF_EQ: + cond = kCondEq; + break; + case Instruction::IF_NE: + cond = kCondNe; + break; + case Instruction::IF_LT: + cond = kCondLt; + break; + case Instruction::IF_GE: + cond = kCondGe; + break; + case Instruction::IF_GT: + cond = kCondGt; + break; + case Instruction::IF_LE: + cond = kCondLe; + break; + default: + cond = static_cast<ConditionCode>(0); + LOG(FATAL) << "Unexpected opcode " << opcode; + } + + // Normalize such that if either operand is constant, src2 will be constant + if (rl_src1.is_const) { + RegLocation rl_temp = rl_src1; + rl_src1 = rl_src2; + rl_src2 = rl_temp; + cond = FlipComparisonOrder(cond); + } + + rl_src1 = LoadValue(rl_src1, kCoreReg); + // Is this really an immediate comparison? + if (rl_src2.is_const) { + // If it's already live in a register or not easily materialized, just keep going + RegLocation rl_temp = UpdateLoc(rl_src2); + if ((rl_temp.location == kLocDalvikFrame) && + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src2))) { + // OK - convert this to a compare immediate and branch + OpCmpImmBranch(cond, rl_src1.low_reg, mir_graph_->ConstantValue(rl_src2), taken); + OpUnconditionalBranch(fall_through); + return; + } + } + rl_src2 = LoadValue(rl_src2, kCoreReg); + OpCmpBranch(cond, rl_src1.low_reg, rl_src2.low_reg, taken); + OpUnconditionalBranch(fall_through); +} + +void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken, + LIR* fall_through) +{ + ConditionCode cond; + rl_src = LoadValue(rl_src, kCoreReg); + switch (opcode) { + case Instruction::IF_EQZ: + cond = kCondEq; + break; + case Instruction::IF_NEZ: + cond = kCondNe; + break; + case Instruction::IF_LTZ: + cond = kCondLt; + break; + case Instruction::IF_GEZ: + cond = kCondGe; + break; + case Instruction::IF_GTZ: + cond = kCondGt; + break; + case Instruction::IF_LEZ: + cond = kCondLe; + break; + default: + cond = static_cast<ConditionCode>(0); + LOG(FATAL) << "Unexpected opcode " << opcode; + } + OpCmpImmBranch(cond, rl_src.low_reg, 0, taken); + OpUnconditionalBranch(fall_through); +} + +void Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (rl_src.location == kLocPhysReg) { + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + } else { + LoadValueDirect(rl_src, rl_result.low_reg); + } + OpRegRegImm(kOpAsr, rl_result.high_reg, rl_result.low_reg, 31); + StoreValueWide(rl_dest, rl_result); +} + +void Mir2Lir::GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src) +{ + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpKind op = kOpInvalid; + switch (opcode) { + case Instruction::INT_TO_BYTE: + op = kOp2Byte; + break; + case Instruction::INT_TO_SHORT: + op = kOp2Short; + break; + case Instruction::INT_TO_CHAR: + op = kOp2Char; + break; + default: + LOG(ERROR) << "Bad int conversion type"; + } + OpRegReg(op, rl_result.low_reg, rl_src.low_reg); + StoreValue(rl_dest, rl_result); +} + +/* + * Let helper function take care of everything. Will call + * Array::AllocFromCode(type_idx, method, count); + * Note: AllocFromCode will handle checks for errNegativeArraySize. + */ +void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src) +{ + FlushAllRegs(); /* Everything to home location */ + int func_offset; + if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, + type_idx)) { + func_offset = ENTRYPOINT_OFFSET(pAllocArrayFromCode); + } else { + func_offset= ENTRYPOINT_OFFSET(pAllocArrayFromCodeWithAccessCheck); + } + CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true); + RegLocation rl_result = GetReturn(false); + StoreValue(rl_dest, rl_result); +} + +/* + * Similar to GenNewArray, but with post-allocation initialization. + * Verifier guarantees we're dealing with an array class. Current + * code throws runtime exception "bad Filled array req" for 'D' and 'J'. + * Current code also throws internal unimp if not 'L', '[' or 'I'. + */ +void Mir2Lir::GenFilledNewArray(CallInfo* info) +{ + int elems = info->num_arg_words; + int type_idx = info->index; + FlushAllRegs(); /* Everything to home location */ + int func_offset; + if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, + type_idx)) { + func_offset = ENTRYPOINT_OFFSET(pCheckAndAllocArrayFromCode); + } else { + func_offset = ENTRYPOINT_OFFSET(pCheckAndAllocArrayFromCodeWithAccessCheck); + } + CallRuntimeHelperImmMethodImm(func_offset, type_idx, elems, true); + FreeTemp(TargetReg(kArg2)); + FreeTemp(TargetReg(kArg1)); + /* + * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the + * return region. Because AllocFromCode placed the new array + * in kRet0, we'll just lock it into place. When debugger support is + * added, it may be necessary to additionally copy all return + * values to a home location in thread-local storage + */ + LockTemp(TargetReg(kRet0)); + + // TODO: use the correct component size, currently all supported types + // share array alignment with ints (see comment at head of function) + size_t component_size = sizeof(int32_t); + + // Having a range of 0 is legal + if (info->is_range && (elems > 0)) { + /* + * Bit of ugliness here. We're going generate a mem copy loop + * on the register range, but it is possible that some regs + * in the range have been promoted. This is unlikely, but + * before generating the copy, we'll just force a flush + * of any regs in the source range that have been promoted to + * home location. + */ + for (int i = 0; i < elems; i++) { + RegLocation loc = UpdateLoc(info->args[i]); + if (loc.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), + loc.low_reg, kWord); + } + } + /* + * TUNING note: generated code here could be much improved, but + * this is an uncommon operation and isn't especially performance + * critical. + */ + int r_src = AllocTemp(); + int r_dst = AllocTemp(); + int r_idx = AllocTemp(); + int r_val = INVALID_REG; + switch(cu_->instruction_set) { + case kThumb2: + r_val = TargetReg(kLr); + break; + case kX86: + FreeTemp(TargetReg(kRet0)); + r_val = AllocTemp(); + break; + case kMips: + r_val = AllocTemp(); + break; + default: LOG(FATAL) << "Unexpected instruction set: " << cu_->instruction_set; + } + // Set up source pointer + RegLocation rl_first = info->args[0]; + OpRegRegImm(kOpAdd, r_src, TargetReg(kSp), SRegOffset(rl_first.s_reg_low)); + // Set up the target pointer + OpRegRegImm(kOpAdd, r_dst, TargetReg(kRet0), + mirror::Array::DataOffset(component_size).Int32Value()); + // Set up the loop counter (known to be > 0) + LoadConstant(r_idx, elems - 1); + // Generate the copy loop. Going backwards for convenience + LIR* target = NewLIR0(kPseudoTargetLabel); + // Copy next element + LoadBaseIndexed(r_src, r_idx, r_val, 2, kWord); + StoreBaseIndexed(r_dst, r_idx, r_val, 2, kWord); + FreeTemp(r_val); + OpDecAndBranch(kCondGe, r_idx, target); + if (cu_->instruction_set == kX86) { + // Restore the target pointer + OpRegRegImm(kOpAdd, TargetReg(kRet0), r_dst, + -mirror::Array::DataOffset(component_size).Int32Value()); + } + } else if (!info->is_range) { + // TUNING: interleave + for (int i = 0; i < elems; i++) { + RegLocation rl_arg = LoadValue(info->args[i], kCoreReg); + StoreBaseDisp(TargetReg(kRet0), + mirror::Array::DataOffset(component_size).Int32Value() + + i * 4, rl_arg.low_reg, kWord); + // If the LoadValue caused a temp to be allocated, free it + if (IsTemp(rl_arg.low_reg)) { + FreeTemp(rl_arg.low_reg); + } + } + } + if (info->result.location != kLocInvalid) { + StoreValue(info->result, GetReturn(false /* not fp */)); + } +} + +void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_double, + bool is_object) +{ + int field_offset; + int ssb_index; + bool is_volatile; + bool is_referrers_class; + bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo( + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index, + is_referrers_class, is_volatile, true); + if (fast_path && !SLOW_FIELD_PATH) { + DCHECK_GE(field_offset, 0); + int rBase; + if (is_referrers_class) { + // Fast path, static storage base is this method's class + RegLocation rl_method = LoadCurrMethod(); + rBase = AllocTemp(); + LoadWordDisp(rl_method.low_reg, + mirror::AbstractMethod::DeclaringClassOffset().Int32Value(), rBase); + if (IsTemp(rl_method.low_reg)) { + FreeTemp(rl_method.low_reg); + } + } else { + // Medium path, static storage base in a different class which requires checks that the other + // class is initialized. + // TODO: remove initialized check now that we are initializing classes in the compiler driver. + DCHECK_GE(ssb_index, 0); + // May do runtime call so everything to home locations. + FlushAllRegs(); + // Using fixed register to sync with possible call to runtime support. + int r_method = TargetReg(kArg1); + LockTemp(r_method); + LoadCurrMethodDirect(r_method); + rBase = TargetReg(kArg0); + LockTemp(rBase); + LoadWordDisp(r_method, + mirror::AbstractMethod::DexCacheInitializedStaticStorageOffset().Int32Value(), + rBase); + LoadWordDisp(rBase, + mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + + sizeof(int32_t*) * ssb_index, rBase); + // rBase now points at appropriate static storage base (Class*) + // or NULL if not initialized. Check for NULL and call helper if NULL. + // TUNING: fast path should fall through + LIR* branch_over = OpCmpImmBranch(kCondNe, rBase, 0, NULL); + LoadConstant(TargetReg(kArg0), ssb_index); + CallRuntimeHelperImm(ENTRYPOINT_OFFSET(pInitializeStaticStorage), ssb_index, true); + if (cu_->instruction_set == kMips) { + // For Arm, kRet0 = kArg0 = rBase, for Mips, we need to copy + OpRegCopy(rBase, TargetReg(kRet0)); + } + LIR* skip_target = NewLIR0(kPseudoTargetLabel); + branch_over->target = skip_target; + FreeTemp(r_method); + } + // rBase now holds static storage base + if (is_long_or_double) { + rl_src = LoadValueWide(rl_src, kAnyReg); + } else { + rl_src = LoadValue(rl_src, kAnyReg); + } + if (is_volatile) { + GenMemBarrier(kStoreStore); + } + if (is_long_or_double) { + StoreBaseDispWide(rBase, field_offset, rl_src.low_reg, + rl_src.high_reg); + } else { + StoreWordDisp(rBase, field_offset, rl_src.low_reg); + } + if (is_volatile) { + GenMemBarrier(kStoreLoad); + } + if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) { + MarkGCCard(rl_src.low_reg, rBase); + } + FreeTemp(rBase); + } else { + FlushAllRegs(); // Everything to home locations + int setter_offset = is_long_or_double ? ENTRYPOINT_OFFSET(pSet64Static) : + (is_object ? ENTRYPOINT_OFFSET(pSetObjStatic) + : ENTRYPOINT_OFFSET(pSet32Static)); + CallRuntimeHelperImmRegLocation(setter_offset, field_idx, rl_src, true); + } +} + +void Mir2Lir::GenSget(uint32_t field_idx, RegLocation rl_dest, + bool is_long_or_double, bool is_object) +{ + int field_offset; + int ssb_index; + bool is_volatile; + bool is_referrers_class; + bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo( + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index, + is_referrers_class, is_volatile, false); + if (fast_path && !SLOW_FIELD_PATH) { + DCHECK_GE(field_offset, 0); + int rBase; + if (is_referrers_class) { + // Fast path, static storage base is this method's class + RegLocation rl_method = LoadCurrMethod(); + rBase = AllocTemp(); + LoadWordDisp(rl_method.low_reg, + mirror::AbstractMethod::DeclaringClassOffset().Int32Value(), rBase); + } else { + // Medium path, static storage base in a different class which requires checks that the other + // class is initialized + // TODO: remove initialized check now that we are initializing classes in the compiler driver. + DCHECK_GE(ssb_index, 0); + // May do runtime call so everything to home locations. + FlushAllRegs(); + // Using fixed register to sync with possible call to runtime support. + int r_method = TargetReg(kArg1); + LockTemp(r_method); + LoadCurrMethodDirect(r_method); + rBase = TargetReg(kArg0); + LockTemp(rBase); + LoadWordDisp(r_method, + mirror::AbstractMethod::DexCacheInitializedStaticStorageOffset().Int32Value(), + rBase); + LoadWordDisp(rBase, mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + + sizeof(int32_t*) * ssb_index, rBase); + // rBase now points at appropriate static storage base (Class*) + // or NULL if not initialized. Check for NULL and call helper if NULL. + // TUNING: fast path should fall through + LIR* branch_over = OpCmpImmBranch(kCondNe, rBase, 0, NULL); + CallRuntimeHelperImm(ENTRYPOINT_OFFSET(pInitializeStaticStorage), ssb_index, true); + if (cu_->instruction_set == kMips) { + // For Arm, kRet0 = kArg0 = rBase, for Mips, we need to copy + OpRegCopy(rBase, TargetReg(kRet0)); + } + LIR* skip_target = NewLIR0(kPseudoTargetLabel); + branch_over->target = skip_target; + FreeTemp(r_method); + } + // rBase now holds static storage base + RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true); + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + if (is_long_or_double) { + LoadBaseDispWide(rBase, field_offset, rl_result.low_reg, + rl_result.high_reg, INVALID_SREG); + } else { + LoadWordDisp(rBase, field_offset, rl_result.low_reg); + } + FreeTemp(rBase); + if (is_long_or_double) { + StoreValueWide(rl_dest, rl_result); + } else { + StoreValue(rl_dest, rl_result); + } + } else { + FlushAllRegs(); // Everything to home locations + int getterOffset = is_long_or_double ? ENTRYPOINT_OFFSET(pGet64Static) : + (is_object ? ENTRYPOINT_OFFSET(pGetObjStatic) + : ENTRYPOINT_OFFSET(pGet32Static)); + CallRuntimeHelperImm(getterOffset, field_idx, true); + if (is_long_or_double) { + RegLocation rl_result = GetReturnWide(rl_dest.fp); + StoreValueWide(rl_dest, rl_result); + } else { + RegLocation rl_result = GetReturn(rl_dest.fp); + StoreValue(rl_dest, rl_result); + } + } +} + +void Mir2Lir::HandleSuspendLaunchPads() +{ + int num_elems = suspend_launchpads_.Size(); + int helper_offset = ENTRYPOINT_OFFSET(pTestSuspendFromCode); + for (int i = 0; i < num_elems; i++) { + ResetRegPool(); + ResetDefTracking(); + LIR* lab = suspend_launchpads_.Get(i); + LIR* resume_lab = reinterpret_cast<LIR*>(lab->operands[0]); + current_dalvik_offset_ = lab->operands[1]; + AppendLIR(lab); + int r_tgt = CallHelperSetup(helper_offset); + CallHelper(r_tgt, helper_offset, true /* MarkSafepointPC */); + OpUnconditionalBranch(resume_lab); + } +} + +void Mir2Lir::HandleIntrinsicLaunchPads() +{ + int num_elems = intrinsic_launchpads_.Size(); + for (int i = 0; i < num_elems; i++) { + ResetRegPool(); + ResetDefTracking(); + LIR* lab = intrinsic_launchpads_.Get(i); + CallInfo* info = reinterpret_cast<CallInfo*>(lab->operands[0]); + current_dalvik_offset_ = info->offset; + AppendLIR(lab); + // NOTE: GenInvoke handles MarkSafepointPC + GenInvoke(info); + LIR* resume_lab = reinterpret_cast<LIR*>(lab->operands[2]); + if (resume_lab != NULL) { + OpUnconditionalBranch(resume_lab); + } + } +} + +void Mir2Lir::HandleThrowLaunchPads() +{ + int num_elems = throw_launchpads_.Size(); + for (int i = 0; i < num_elems; i++) { + ResetRegPool(); + ResetDefTracking(); + LIR* lab = throw_launchpads_.Get(i); + current_dalvik_offset_ = lab->operands[1]; + AppendLIR(lab); + int func_offset = 0; + int v1 = lab->operands[2]; + int v2 = lab->operands[3]; + bool target_x86 = (cu_->instruction_set == kX86); + switch (lab->operands[0]) { + case kThrowNullPointer: + func_offset = ENTRYPOINT_OFFSET(pThrowNullPointerFromCode); + break; + case kThrowConstantArrayBounds: // v1 is length reg (for Arm/Mips), v2 constant index + // v1 holds the constant array index. Mips/Arm uses v2 for length, x86 reloads. + if (target_x86) { + OpRegMem(kOpMov, TargetReg(kArg1), v1, mirror::Array::LengthOffset().Int32Value()); + } else { + OpRegCopy(TargetReg(kArg1), v1); + } + // Make sure the following LoadConstant doesn't mess with kArg1. + LockTemp(TargetReg(kArg1)); + LoadConstant(TargetReg(kArg0), v2); + func_offset = ENTRYPOINT_OFFSET(pThrowArrayBoundsFromCode); + break; + case kThrowArrayBounds: + // Move v1 (array index) to kArg0 and v2 (array length) to kArg1 + if (v2 != TargetReg(kArg0)) { + OpRegCopy(TargetReg(kArg0), v1); + if (target_x86) { + // x86 leaves the array pointer in v2, so load the array length that the handler expects + OpRegMem(kOpMov, TargetReg(kArg1), v2, mirror::Array::LengthOffset().Int32Value()); + } else { + OpRegCopy(TargetReg(kArg1), v2); + } + } else { + if (v1 == TargetReg(kArg1)) { + // Swap v1 and v2, using kArg2 as a temp + OpRegCopy(TargetReg(kArg2), v1); + if (target_x86) { + // x86 leaves the array pointer in v2; load the array length that the handler expects + OpRegMem(kOpMov, TargetReg(kArg1), v2, mirror::Array::LengthOffset().Int32Value()); + } else { + OpRegCopy(TargetReg(kArg1), v2); + } + OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); + } else { + if (target_x86) { + // x86 leaves the array pointer in v2; load the array length that the handler expects + OpRegMem(kOpMov, TargetReg(kArg1), v2, mirror::Array::LengthOffset().Int32Value()); + } else { + OpRegCopy(TargetReg(kArg1), v2); + } + OpRegCopy(TargetReg(kArg0), v1); + } + } + func_offset = ENTRYPOINT_OFFSET(pThrowArrayBoundsFromCode); + break; + case kThrowDivZero: + func_offset = ENTRYPOINT_OFFSET(pThrowDivZeroFromCode); + break; + case kThrowNoSuchMethod: + OpRegCopy(TargetReg(kArg0), v1); + func_offset = + ENTRYPOINT_OFFSET(pThrowNoSuchMethodFromCode); + break; + case kThrowStackOverflow: + func_offset = ENTRYPOINT_OFFSET(pThrowStackOverflowFromCode); + // Restore stack alignment + if (target_x86) { + OpRegImm(kOpAdd, TargetReg(kSp), frame_size_); + } else { + OpRegImm(kOpAdd, TargetReg(kSp), (num_core_spills_ + num_fp_spills_) * 4); + } + break; + default: + LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0]; + } + ClobberCalleeSave(); + int r_tgt = CallHelperSetup(func_offset); + CallHelper(r_tgt, func_offset, true /* MarkSafepointPC */); + } +} + +void Mir2Lir::GenIGet(uint32_t field_idx, int opt_flags, OpSize size, + RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, + bool is_object) +{ + int field_offset; + bool is_volatile; + + bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + + if (fast_path && !SLOW_FIELD_PATH) { + RegLocation rl_result; + RegisterClass reg_class = oat_reg_class_by_size(size); + DCHECK_GE(field_offset, 0); + rl_obj = LoadValue(rl_obj, kCoreReg); + if (is_long_or_double) { + DCHECK(rl_dest.wide); + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, opt_flags); + if (cu_->instruction_set == kX86) { + rl_result = EvalLoc(rl_dest, reg_class, true); + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, opt_flags); + LoadBaseDispWide(rl_obj.low_reg, field_offset, rl_result.low_reg, + rl_result.high_reg, rl_obj.s_reg_low); + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + } else { + int reg_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, reg_ptr, rl_obj.low_reg, field_offset); + rl_result = EvalLoc(rl_dest, reg_class, true); + LoadBaseDispWide(reg_ptr, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + FreeTemp(reg_ptr); + } + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, reg_class, true); + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, opt_flags); + LoadBaseDisp(rl_obj.low_reg, field_offset, rl_result.low_reg, + kWord, rl_obj.s_reg_low); + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + StoreValue(rl_dest, rl_result); + } + } else { + int getterOffset = is_long_or_double ? ENTRYPOINT_OFFSET(pGet64Instance) : + (is_object ? ENTRYPOINT_OFFSET(pGetObjInstance) + : ENTRYPOINT_OFFSET(pGet32Instance)); + CallRuntimeHelperImmRegLocation(getterOffset, field_idx, rl_obj, true); + if (is_long_or_double) { + RegLocation rl_result = GetReturnWide(rl_dest.fp); + StoreValueWide(rl_dest, rl_result); + } else { + RegLocation rl_result = GetReturn(rl_dest.fp); + StoreValue(rl_dest, rl_result); + } + } +} + +void Mir2Lir::GenIPut(uint32_t field_idx, int opt_flags, OpSize size, + RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, + bool is_object) +{ + int field_offset; + bool is_volatile; + + bool fast_path = FastInstance(field_idx, field_offset, is_volatile, + true); + if (fast_path && !SLOW_FIELD_PATH) { + RegisterClass reg_class = oat_reg_class_by_size(size); + DCHECK_GE(field_offset, 0); + rl_obj = LoadValue(rl_obj, kCoreReg); + if (is_long_or_double) { + int reg_ptr; + rl_src = LoadValueWide(rl_src, kAnyReg); + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, opt_flags); + reg_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, reg_ptr, rl_obj.low_reg, field_offset); + if (is_volatile) { + GenMemBarrier(kStoreStore); + } + StoreBaseDispWide(reg_ptr, 0, rl_src.low_reg, rl_src.high_reg); + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + FreeTemp(reg_ptr); + } else { + rl_src = LoadValue(rl_src, reg_class); + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, opt_flags); + if (is_volatile) { + GenMemBarrier(kStoreStore); + } + StoreBaseDisp(rl_obj.low_reg, field_offset, rl_src.low_reg, kWord); + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) { + MarkGCCard(rl_src.low_reg, rl_obj.low_reg); + } + } + } else { + int setter_offset = is_long_or_double ? ENTRYPOINT_OFFSET(pSet64Instance) : + (is_object ? ENTRYPOINT_OFFSET(pSetObjInstance) + : ENTRYPOINT_OFFSET(pSet32Instance)); + CallRuntimeHelperImmRegLocationRegLocation(setter_offset, field_idx, rl_obj, rl_src, true); + } +} + +void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) +{ + RegLocation rl_method = LoadCurrMethod(); + int res_reg = AllocTemp(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, + *cu_->dex_file, + type_idx)) { + // Call out to helper which resolves type and verifies access. + // Resolved type returned in kRet0. + CallRuntimeHelperImmReg(ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccessFromCode), + type_idx, rl_method.low_reg, true); + RegLocation rl_result = GetReturn(false); + StoreValue(rl_dest, rl_result); + } else { + // We're don't need access checks, load type from dex cache + int32_t dex_cache_offset = + mirror::AbstractMethod::DexCacheResolvedTypesOffset().Int32Value(); + LoadWordDisp(rl_method.low_reg, dex_cache_offset, res_reg); + int32_t offset_of_type = + mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*) + * type_idx); + LoadWordDisp(res_reg, offset_of_type, rl_result.low_reg); + if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, + type_idx) || SLOW_TYPE_PATH) { + // Slow path, at runtime test if type is null and if so initialize + FlushAllRegs(); + LIR* branch1 = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL); + // Resolved, store and hop over following code + StoreValue(rl_dest, rl_result); + /* + * Because we have stores of the target value on two paths, + * clobber temp tracking for the destination using the ssa name + */ + ClobberSReg(rl_dest.s_reg_low); + LIR* branch2 = OpUnconditionalBranch(0); + // TUNING: move slow path to end & remove unconditional branch + LIR* target1 = NewLIR0(kPseudoTargetLabel); + // Call out to helper, which will return resolved type in kArg0 + CallRuntimeHelperImmReg(ENTRYPOINT_OFFSET(pInitializeTypeFromCode), type_idx, + rl_method.low_reg, true); + RegLocation rl_result = GetReturn(false); + StoreValue(rl_dest, rl_result); + /* + * Because we have stores of the target value on two paths, + * clobber temp tracking for the destination using the ssa name + */ + ClobberSReg(rl_dest.s_reg_low); + // Rejoin code paths + LIR* target2 = NewLIR0(kPseudoTargetLabel); + branch1->target = target1; + branch2->target = target2; + } else { + // Fast path, we're done - just store result + StoreValue(rl_dest, rl_result); + } + } +} + +void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) +{ + /* NOTE: Most strings should be available at compile time */ + int32_t offset_of_string = mirror::Array::DataOffset(sizeof(mirror::String*)).Int32Value() + + (sizeof(mirror::String*) * string_idx); + if (!cu_->compiler_driver->CanAssumeStringIsPresentInDexCache( + *cu_->dex_file, string_idx) || SLOW_STRING_PATH) { + // slow path, resolve string if not in dex cache + FlushAllRegs(); + LockCallTemps(); // Using explicit registers + LoadCurrMethodDirect(TargetReg(kArg2)); + LoadWordDisp(TargetReg(kArg2), + mirror::AbstractMethod::DexCacheStringsOffset().Int32Value(), TargetReg(kArg0)); + // Might call out to helper, which will return resolved string in kRet0 + int r_tgt = CallHelperSetup(ENTRYPOINT_OFFSET(pResolveStringFromCode)); + LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0)); + LoadConstant(TargetReg(kArg1), string_idx); + if (cu_->instruction_set == kThumb2) { + OpRegImm(kOpCmp, TargetReg(kRet0), 0); // Is resolved? + GenBarrier(); + // For testing, always force through helper + if (!EXERCISE_SLOWEST_STRING_PATH) { + OpIT(kCondEq, "T"); + } + OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); // .eq + LIR* call_inst = OpReg(kOpBlx, r_tgt); // .eq, helper(Method*, string_idx) + MarkSafepointPC(call_inst); + FreeTemp(r_tgt); + } else if (cu_->instruction_set == kMips) { + LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL); + OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); // .eq + LIR* call_inst = OpReg(kOpBlx, r_tgt); + MarkSafepointPC(call_inst); + FreeTemp(r_tgt); + LIR* target = NewLIR0(kPseudoTargetLabel); + branch->target = target; + } else { + DCHECK_EQ(cu_->instruction_set, kX86); + CallRuntimeHelperRegReg(ENTRYPOINT_OFFSET(pResolveStringFromCode), TargetReg(kArg2), TargetReg(kArg1), true); + } + GenBarrier(); + StoreValue(rl_dest, GetReturn(false)); + } else { + RegLocation rl_method = LoadCurrMethod(); + int res_reg = AllocTemp(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadWordDisp(rl_method.low_reg, + mirror::AbstractMethod::DexCacheStringsOffset().Int32Value(), res_reg); + LoadWordDisp(res_reg, offset_of_string, rl_result.low_reg); + StoreValue(rl_dest, rl_result); + } +} + +/* + * Let helper function take care of everything. Will + * call Class::NewInstanceFromCode(type_idx, method); + */ +void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) +{ + FlushAllRegs(); /* Everything to home location */ + // alloc will always check for resolution, do we also need to verify + // access because the verifier was unable to? + int func_offset; + if (cu_->compiler_driver->CanAccessInstantiableTypeWithoutChecks( + cu_->method_idx, *cu_->dex_file, type_idx)) { + func_offset = ENTRYPOINT_OFFSET(pAllocObjectFromCode); + } else { + func_offset = ENTRYPOINT_OFFSET(pAllocObjectFromCodeWithAccessCheck); + } + CallRuntimeHelperImmMethod(func_offset, type_idx, true); + RegLocation rl_result = GetReturn(false); + StoreValue(rl_dest, rl_result); +} + +void Mir2Lir::GenThrow(RegLocation rl_src) +{ + FlushAllRegs(); + CallRuntimeHelperRegLocation(ENTRYPOINT_OFFSET(pDeliverException), rl_src, true); +} + +// For final classes there are no sub-classes to check and so we can answer the instance-of +// question with simple comparisons. +void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src) { + RegLocation object = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int result_reg = rl_result.low_reg; + if (result_reg == object.low_reg) { + result_reg = AllocTypedTemp(false, kCoreReg); + } + LoadConstant(result_reg, 0); // assume false + LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL); + + int check_class = AllocTypedTemp(false, kCoreReg); + int object_class = AllocTypedTemp(false, kCoreReg); + + LoadCurrMethodDirect(check_class); + if (use_declaring_class) { + LoadWordDisp(check_class, mirror::AbstractMethod::DeclaringClassOffset().Int32Value(), + check_class); + LoadWordDisp(object.low_reg, mirror::Object::ClassOffset().Int32Value(), object_class); + } else { + LoadWordDisp(check_class, mirror::AbstractMethod::DexCacheResolvedTypesOffset().Int32Value(), + check_class); + LoadWordDisp(object.low_reg, mirror::Object::ClassOffset().Int32Value(), object_class); + int32_t offset_of_type = + mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + + (sizeof(mirror::Class*) * type_idx); + LoadWordDisp(check_class, offset_of_type, check_class); + } + + LIR* ne_branchover = NULL; + if (cu_->instruction_set == kThumb2) { + OpRegReg(kOpCmp, check_class, object_class); // Same? + OpIT(kCondEq, ""); // if-convert the test + LoadConstant(result_reg, 1); // .eq case - load true + } else { + ne_branchover = OpCmpBranch(kCondNe, check_class, object_class, NULL); + LoadConstant(result_reg, 1); // eq case - load true + } + LIR* target = NewLIR0(kPseudoTargetLabel); + null_branchover->target = target; + if (ne_branchover != NULL) { + ne_branchover->target = target; + } + FreeTemp(object_class); + FreeTemp(check_class); + if (IsTemp(result_reg)) { + OpRegCopy(rl_result.low_reg, result_reg); + FreeTemp(result_reg); + } + StoreValue(rl_dest, rl_result); +} + +void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, + bool type_known_abstract, bool use_declaring_class, + bool can_assume_type_is_in_dex_cache, + uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src) { + FlushAllRegs(); + // May generate a call - use explicit registers + LockCallTemps(); + LoadCurrMethodDirect(TargetReg(kArg1)); // kArg1 <= current Method* + int class_reg = TargetReg(kArg2); // kArg2 will hold the Class* + if (needs_access_check) { + // Check we have access to type_idx and if not throw IllegalAccessError, + // returns Class* in kArg0 + CallRuntimeHelperImm(ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccessFromCode), + type_idx, true); + OpRegCopy(class_reg, TargetReg(kRet0)); // Align usage with fast path + LoadValueDirectFixed(rl_src, TargetReg(kArg0)); // kArg0 <= ref + } else if (use_declaring_class) { + LoadValueDirectFixed(rl_src, TargetReg(kArg0)); // kArg0 <= ref + LoadWordDisp(TargetReg(kArg1), + mirror::AbstractMethod::DeclaringClassOffset().Int32Value(), class_reg); + } else { + // Load dex cache entry into class_reg (kArg2) + LoadValueDirectFixed(rl_src, TargetReg(kArg0)); // kArg0 <= ref + LoadWordDisp(TargetReg(kArg1), + mirror::AbstractMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg); + int32_t offset_of_type = + mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*) + * type_idx); + LoadWordDisp(class_reg, offset_of_type, class_reg); + if (!can_assume_type_is_in_dex_cache) { + // Need to test presence of type in dex cache at runtime + LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); + // Not resolved + // Call out to helper, which will return resolved type in kRet0 + CallRuntimeHelperImm(ENTRYPOINT_OFFSET(pInitializeTypeFromCode), type_idx, true); + OpRegCopy(TargetReg(kArg2), TargetReg(kRet0)); // Align usage with fast path + LoadValueDirectFixed(rl_src, TargetReg(kArg0)); /* reload Ref */ + // Rejoin code paths + LIR* hop_target = NewLIR0(kPseudoTargetLabel); + hop_branch->target = hop_target; + } + } + /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */ + RegLocation rl_result = GetReturn(false); + if (cu_->instruction_set == kMips) { + // On MIPS rArg0 != rl_result, place false in result if branch is taken. + LoadConstant(rl_result.low_reg, 0); + } + LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); + + /* load object->klass_ */ + DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); + LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); + /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */ + LIR* branchover = NULL; + if (type_known_final) { + // rl_result == ref == null == 0. + if (cu_->instruction_set == kThumb2) { + OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2)); // Same? + OpIT(kCondEq, "E"); // if-convert the test + LoadConstant(rl_result.low_reg, 1); // .eq case - load true + LoadConstant(rl_result.low_reg, 0); // .ne case - load false + } else { + LoadConstant(rl_result.low_reg, 0); // ne case - load false + branchover = OpCmpBranch(kCondNe, TargetReg(kArg1), TargetReg(kArg2), NULL); + LoadConstant(rl_result.low_reg, 1); // eq case - load true + } + } else { + if (cu_->instruction_set == kThumb2) { + int r_tgt = LoadHelper(ENTRYPOINT_OFFSET(pInstanceofNonTrivialFromCode)); + if (!type_known_abstract) { + /* Uses conditional nullification */ + OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2)); // Same? + OpIT(kCondEq, "EE"); // if-convert the test + LoadConstant(TargetReg(kArg0), 1); // .eq case - load true + } + OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); // .ne case - arg0 <= class + OpReg(kOpBlx, r_tgt); // .ne case: helper(class, ref->class) + FreeTemp(r_tgt); + } else { + if (!type_known_abstract) { + /* Uses branchovers */ + LoadConstant(rl_result.low_reg, 1); // assume true + branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL); + } + if (cu_->instruction_set != kX86) { + int r_tgt = LoadHelper(ENTRYPOINT_OFFSET(pInstanceofNonTrivialFromCode)); + OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); // .ne case - arg0 <= class + OpReg(kOpBlx, r_tgt); // .ne case: helper(class, ref->class) + FreeTemp(r_tgt); + } else { + OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); + OpThreadMem(kOpBlx, ENTRYPOINT_OFFSET(pInstanceofNonTrivialFromCode)); + } + } + } + // TODO: only clobber when type isn't final? + ClobberCalleeSave(); + /* branch targets here */ + LIR* target = NewLIR0(kPseudoTargetLabel); + StoreValue(rl_dest, rl_result); + branch1->target = target; + if (branchover != NULL) { + branchover->target = target; + } +} + +void Mir2Lir::GenInstanceof(uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src) { + bool type_known_final, type_known_abstract, use_declaring_class; + bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, + *cu_->dex_file, + type_idx, + &type_known_final, + &type_known_abstract, + &use_declaring_class); + bool can_assume_type_is_in_dex_cache = !needs_access_check && + cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx); + + if ((use_declaring_class || can_assume_type_is_in_dex_cache) && type_known_final) { + GenInstanceofFinal(use_declaring_class, type_idx, rl_dest, rl_src); + } else { + GenInstanceofCallingHelper(needs_access_check, type_known_final, type_known_abstract, + use_declaring_class, can_assume_type_is_in_dex_cache, + type_idx, rl_dest, rl_src); + } +} + +void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src) +{ + bool type_known_final, type_known_abstract, use_declaring_class; + bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, + *cu_->dex_file, + type_idx, + &type_known_final, + &type_known_abstract, + &use_declaring_class); + // Note: currently type_known_final is unused, as optimizing will only improve the performance + // of the exception throw path. + DexCompilationUnit* cu = mir_graph_->GetCurrentDexCompilationUnit(); + const MethodReference mr(cu->GetDexFile(), cu->GetDexMethodIndex()); + if (!needs_access_check && cu_->compiler_driver->IsSafeCast(mr, insn_idx)) { + // Verifier type analysis proved this check cast would never cause an exception. + return; + } + FlushAllRegs(); + // May generate a call - use explicit registers + LockCallTemps(); + LoadCurrMethodDirect(TargetReg(kArg1)); // kArg1 <= current Method* + int class_reg = TargetReg(kArg2); // kArg2 will hold the Class* + if (needs_access_check) { + // Check we have access to type_idx and if not throw IllegalAccessError, + // returns Class* in kRet0 + // InitializeTypeAndVerifyAccess(idx, method) + CallRuntimeHelperImmReg(ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccessFromCode), + type_idx, TargetReg(kArg1), true); + OpRegCopy(class_reg, TargetReg(kRet0)); // Align usage with fast path + } else if (use_declaring_class) { + LoadWordDisp(TargetReg(kArg1), + mirror::AbstractMethod::DeclaringClassOffset().Int32Value(), class_reg); + } else { + // Load dex cache entry into class_reg (kArg2) + LoadWordDisp(TargetReg(kArg1), + mirror::AbstractMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg); + int32_t offset_of_type = + mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + + (sizeof(mirror::Class*) * type_idx); + LoadWordDisp(class_reg, offset_of_type, class_reg); + if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) { + // Need to test presence of type in dex cache at runtime + LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); + // Not resolved + // Call out to helper, which will return resolved type in kArg0 + // InitializeTypeFromCode(idx, method) + CallRuntimeHelperImmReg(ENTRYPOINT_OFFSET(pInitializeTypeFromCode), type_idx, TargetReg(kArg1), + true); + OpRegCopy(class_reg, TargetReg(kRet0)); // Align usage with fast path + // Rejoin code paths + LIR* hop_target = NewLIR0(kPseudoTargetLabel); + hop_branch->target = hop_target; + } + } + // At this point, class_reg (kArg2) has class + LoadValueDirectFixed(rl_src, TargetReg(kArg0)); // kArg0 <= ref + /* Null is OK - continue */ + LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); + /* load object->klass_ */ + DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); + LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); + /* kArg1 now contains object->klass_ */ + LIR* branch2 = NULL; + if (!type_known_abstract) { + branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL); + } + CallRuntimeHelperRegReg(ENTRYPOINT_OFFSET(pCheckCastFromCode), TargetReg(kArg1), TargetReg(kArg2), + true); + /* branch target here */ + LIR* target = NewLIR0(kPseudoTargetLabel); + branch1->target = target; + if (branch2 != NULL) { + branch2->target = target; + } +} + +void Mir2Lir::GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + RegLocation rl_result; + if (cu_->instruction_set == kThumb2) { + /* + * NOTE: This is the one place in the code in which we might have + * as many as six live temporary registers. There are 5 in the normal + * set for Arm. Until we have spill capabilities, temporarily add + * lr to the temp set. It is safe to do this locally, but note that + * lr is used explicitly elsewhere in the code generator and cannot + * normally be used as a general temp register. + */ + MarkTemp(TargetReg(kLr)); // Add lr to the temp pool + FreeTemp(TargetReg(kLr)); // and make it available + } + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + // The longs may overlap - use intermediate temp if so + if ((rl_result.low_reg == rl_src1.high_reg) || (rl_result.low_reg == rl_src2.high_reg)){ + int t_reg = AllocTemp(); + OpRegRegReg(first_op, t_reg, rl_src1.low_reg, rl_src2.low_reg); + OpRegRegReg(second_op, rl_result.high_reg, rl_src1.high_reg, rl_src2.high_reg); + OpRegCopy(rl_result.low_reg, t_reg); + FreeTemp(t_reg); + } else { + OpRegRegReg(first_op, rl_result.low_reg, rl_src1.low_reg, rl_src2.low_reg); + OpRegRegReg(second_op, rl_result.high_reg, rl_src1.high_reg, + rl_src2.high_reg); + } + /* + * NOTE: If rl_dest refers to a frame variable in a large frame, the + * following StoreValueWide might need to allocate a temp register. + * To further work around the lack of a spill capability, explicitly + * free any temps from rl_src1 & rl_src2 that aren't still live in rl_result. + * Remove when spill is functional. + */ + FreeRegLocTemps(rl_result, rl_src1); + FreeRegLocTemps(rl_result, rl_src2); + StoreValueWide(rl_dest, rl_result); + if (cu_->instruction_set == kThumb2) { + Clobber(TargetReg(kLr)); + UnmarkTemp(TargetReg(kLr)); // Remove lr from the temp pool + } +} + + +void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) +{ + int func_offset = -1; // Make gcc happy + + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + func_offset = ENTRYPOINT_OFFSET(pShlLong); + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + func_offset = ENTRYPOINT_OFFSET(pShrLong); + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + func_offset = ENTRYPOINT_OFFSET(pUshrLong); + break; + default: + LOG(FATAL) << "Unexpected case"; + } + FlushAllRegs(); /* Send everything to home location */ + CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_shift, false); + RegLocation rl_result = GetReturnWide(false); + StoreValueWide(rl_dest, rl_result); +} + + +void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + OpKind op = kOpBkpt; + bool is_div_rem = false; + bool check_zero = false; + bool unary = false; + RegLocation rl_result; + bool shift_op = false; + switch (opcode) { + case Instruction::NEG_INT: + op = kOpNeg; + unary = true; + break; + case Instruction::NOT_INT: + op = kOpMvn; + unary = true; + break; + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + op = kOpAdd; + break; + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + op = kOpSub; + break; + case Instruction::MUL_INT: + case Instruction::MUL_INT_2ADDR: + op = kOpMul; + break; + case Instruction::DIV_INT: + case Instruction::DIV_INT_2ADDR: + check_zero = true; + op = kOpDiv; + is_div_rem = true; + break; + /* NOTE: returns in kArg1 */ + case Instruction::REM_INT: + case Instruction::REM_INT_2ADDR: + check_zero = true; + op = kOpRem; + is_div_rem = true; + break; + case Instruction::AND_INT: + case Instruction::AND_INT_2ADDR: + op = kOpAnd; + break; + case Instruction::OR_INT: + case Instruction::OR_INT_2ADDR: + op = kOpOr; + break; + case Instruction::XOR_INT: + case Instruction::XOR_INT_2ADDR: + op = kOpXor; + break; + case Instruction::SHL_INT: + case Instruction::SHL_INT_2ADDR: + shift_op = true; + op = kOpLsl; + break; + case Instruction::SHR_INT: + case Instruction::SHR_INT_2ADDR: + shift_op = true; + op = kOpAsr; + break; + case Instruction::USHR_INT: + case Instruction::USHR_INT_2ADDR: + shift_op = true; + op = kOpLsr; + break; + default: + LOG(FATAL) << "Invalid word arith op: " << opcode; + } + if (!is_div_rem) { + if (unary) { + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegReg(op, rl_result.low_reg, rl_src1.low_reg); + } else { + if (shift_op) { + int t_reg = INVALID_REG; + if (cu_->instruction_set == kX86) { + // X86 doesn't require masking and must use ECX + t_reg = TargetReg(kCount); // rCX + LoadValueDirectFixed(rl_src2, t_reg); + } else { + rl_src2 = LoadValue(rl_src2, kCoreReg); + t_reg = AllocTemp(); + OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31); + } + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegReg(op, rl_result.low_reg, rl_src1.low_reg, t_reg); + FreeTemp(t_reg); + } else { + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegReg(op, rl_result.low_reg, rl_src1.low_reg, rl_src2.low_reg); + } + } + StoreValue(rl_dest, rl_result); + } else { + if (cu_->instruction_set == kMips) { + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + if (check_zero) { + GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero); + } + rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv); + } else { + int func_offset = ENTRYPOINT_OFFSET(pIdivmod); + FlushAllRegs(); /* Send everything to home location */ + LoadValueDirectFixed(rl_src2, TargetReg(kArg1)); + int r_tgt = CallHelperSetup(func_offset); + LoadValueDirectFixed(rl_src1, TargetReg(kArg0)); + if (check_zero) { + GenImmedCheck(kCondEq, TargetReg(kArg1), 0, kThrowDivZero); + } + // NOTE: callout here is not a safepoint + CallHelper(r_tgt, func_offset, false /* not a safepoint */ ); + if (op == kOpDiv) + rl_result = GetReturn(false); + else + rl_result = GetReturnAlt(); + } + StoreValue(rl_dest, rl_result); + } +} + +/* + * The following are the first-level codegen routines that analyze the format + * of each bytecode then either dispatch special purpose codegen routines + * or produce corresponding Thumb instructions directly. + */ + +static bool IsPowerOfTwo(int x) +{ + return (x & (x - 1)) == 0; +} + +// Returns true if no more than two bits are set in 'x'. +static bool IsPopCountLE2(unsigned int x) +{ + x &= x - 1; + return (x & (x - 1)) == 0; +} + +// Returns the index of the lowest set bit in 'x'. +static int LowestSetBit(unsigned int x) { + int bit_posn = 0; + while ((x & 0xf) == 0) { + bit_posn += 4; + x >>= 4; + } + while ((x & 1) == 0) { + bit_posn++; + x >>= 1; + } + return bit_posn; +} + +// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit' +// and store the result in 'rl_dest'. +bool Mir2Lir::HandleEasyDivide(Instruction::Code dalvik_opcode, + RegLocation rl_src, RegLocation rl_dest, int lit) +{ + if ((lit < 2) || ((cu_->instruction_set != kThumb2) && !IsPowerOfTwo(lit))) { + return false; + } + // No divide instruction for Arm, so check for more special cases + if ((cu_->instruction_set == kThumb2) && !IsPowerOfTwo(lit)) { + return SmallLiteralDivide(dalvik_opcode, rl_src, rl_dest, lit); + } + int k = LowestSetBit(lit); + if (k >= 30) { + // Avoid special cases. + return false; + } + bool div = (dalvik_opcode == Instruction::DIV_INT_LIT8 || + dalvik_opcode == Instruction::DIV_INT_LIT16); + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (div) { + int t_reg = AllocTemp(); + if (lit == 2) { + // Division by 2 is by far the most common division by constant. + OpRegRegImm(kOpLsr, t_reg, rl_src.low_reg, 32 - k); + OpRegRegReg(kOpAdd, t_reg, t_reg, rl_src.low_reg); + OpRegRegImm(kOpAsr, rl_result.low_reg, t_reg, k); + } else { + OpRegRegImm(kOpAsr, t_reg, rl_src.low_reg, 31); + OpRegRegImm(kOpLsr, t_reg, t_reg, 32 - k); + OpRegRegReg(kOpAdd, t_reg, t_reg, rl_src.low_reg); + OpRegRegImm(kOpAsr, rl_result.low_reg, t_reg, k); + } + } else { + int t_reg1 = AllocTemp(); + int t_reg2 = AllocTemp(); + if (lit == 2) { + OpRegRegImm(kOpLsr, t_reg1, rl_src.low_reg, 32 - k); + OpRegRegReg(kOpAdd, t_reg2, t_reg1, rl_src.low_reg); + OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit -1); + OpRegRegReg(kOpSub, rl_result.low_reg, t_reg2, t_reg1); + } else { + OpRegRegImm(kOpAsr, t_reg1, rl_src.low_reg, 31); + OpRegRegImm(kOpLsr, t_reg1, t_reg1, 32 - k); + OpRegRegReg(kOpAdd, t_reg2, t_reg1, rl_src.low_reg); + OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit - 1); + OpRegRegReg(kOpSub, rl_result.low_reg, t_reg2, t_reg1); + } + } + StoreValue(rl_dest, rl_result); + return true; +} + +// Returns true if it added instructions to 'cu' to multiply 'rl_src' by 'lit' +// and store the result in 'rl_dest'. +bool Mir2Lir::HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) +{ + // Can we simplify this multiplication? + bool power_of_two = false; + bool pop_count_le2 = false; + bool power_of_two_minus_one = false; + if (lit < 2) { + // Avoid special cases. + return false; + } else if (IsPowerOfTwo(lit)) { + power_of_two = true; + } else if (IsPopCountLE2(lit)) { + pop_count_le2 = true; + } else if (IsPowerOfTwo(lit + 1)) { + power_of_two_minus_one = true; + } else { + return false; + } + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (power_of_two) { + // Shift. + OpRegRegImm(kOpLsl, rl_result.low_reg, rl_src.low_reg, LowestSetBit(lit)); + } else if (pop_count_le2) { + // Shift and add and shift. + int first_bit = LowestSetBit(lit); + int second_bit = LowestSetBit(lit ^ (1 << first_bit)); + GenMultiplyByTwoBitMultiplier(rl_src, rl_result, lit, first_bit, second_bit); + } else { + // Reverse subtract: (src << (shift + 1)) - src. + DCHECK(power_of_two_minus_one); + // TUNING: rsb dst, src, src lsl#LowestSetBit(lit + 1) + int t_reg = AllocTemp(); + OpRegRegImm(kOpLsl, t_reg, rl_src.low_reg, LowestSetBit(lit + 1)); + OpRegRegReg(kOpSub, rl_result.low_reg, t_reg, rl_src.low_reg); + } + StoreValue(rl_dest, rl_result); + return true; +} + +void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, + int lit) +{ + RegLocation rl_result; + OpKind op = static_cast<OpKind>(0); /* Make gcc happy */ + int shift_op = false; + bool is_div = false; + + switch (opcode) { + case Instruction::RSUB_INT_LIT8: + case Instruction::RSUB_INT: { + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (cu_->instruction_set == kThumb2) { + OpRegRegImm(kOpRsub, rl_result.low_reg, rl_src.low_reg, lit); + } else { + OpRegReg(kOpNeg, rl_result.low_reg, rl_src.low_reg); + OpRegImm(kOpAdd, rl_result.low_reg, lit); + } + StoreValue(rl_dest, rl_result); + return; + } + + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + lit = -lit; + // Intended fallthrough + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + case Instruction::ADD_INT_LIT8: + case Instruction::ADD_INT_LIT16: + op = kOpAdd; + break; + case Instruction::MUL_INT: + case Instruction::MUL_INT_2ADDR: + case Instruction::MUL_INT_LIT8: + case Instruction::MUL_INT_LIT16: { + if (HandleEasyMultiply(rl_src, rl_dest, lit)) { + return; + } + op = kOpMul; + break; + } + case Instruction::AND_INT: + case Instruction::AND_INT_2ADDR: + case Instruction::AND_INT_LIT8: + case Instruction::AND_INT_LIT16: + op = kOpAnd; + break; + case Instruction::OR_INT: + case Instruction::OR_INT_2ADDR: + case Instruction::OR_INT_LIT8: + case Instruction::OR_INT_LIT16: + op = kOpOr; + break; + case Instruction::XOR_INT: + case Instruction::XOR_INT_2ADDR: + case Instruction::XOR_INT_LIT8: + case Instruction::XOR_INT_LIT16: + op = kOpXor; + break; + case Instruction::SHL_INT_LIT8: + case Instruction::SHL_INT: + case Instruction::SHL_INT_2ADDR: + lit &= 31; + shift_op = true; + op = kOpLsl; + break; + case Instruction::SHR_INT_LIT8: + case Instruction::SHR_INT: + case Instruction::SHR_INT_2ADDR: + lit &= 31; + shift_op = true; + op = kOpAsr; + break; + case Instruction::USHR_INT_LIT8: + case Instruction::USHR_INT: + case Instruction::USHR_INT_2ADDR: + lit &= 31; + shift_op = true; + op = kOpLsr; + break; + + case Instruction::DIV_INT: + case Instruction::DIV_INT_2ADDR: + case Instruction::DIV_INT_LIT8: + case Instruction::DIV_INT_LIT16: + case Instruction::REM_INT: + case Instruction::REM_INT_2ADDR: + case Instruction::REM_INT_LIT8: + case Instruction::REM_INT_LIT16: { + if (lit == 0) { + GenImmedCheck(kCondAl, 0, 0, kThrowDivZero); + return; + } + if (HandleEasyDivide(opcode, rl_src, rl_dest, lit)) { + return; + } + if ((opcode == Instruction::DIV_INT_LIT8) || + (opcode == Instruction::DIV_INT) || + (opcode == Instruction::DIV_INT_2ADDR) || + (opcode == Instruction::DIV_INT_LIT16)) { + is_div = true; + } else { + is_div = false; + } + if (cu_->instruction_set == kMips) { + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div); + } else { + FlushAllRegs(); /* Everything to home location */ + LoadValueDirectFixed(rl_src, TargetReg(kArg0)); + Clobber(TargetReg(kArg0)); + int func_offset = ENTRYPOINT_OFFSET(pIdivmod); + CallRuntimeHelperRegImm(func_offset, TargetReg(kArg0), lit, false); + if (is_div) + rl_result = GetReturn(false); + else + rl_result = GetReturnAlt(); + } + StoreValue(rl_dest, rl_result); + return; + } + default: + LOG(FATAL) << "Unexpected opcode " << opcode; + } + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + // Avoid shifts by literal 0 - no support in Thumb. Change to copy + if (shift_op && (lit == 0)) { + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + } else { + OpRegRegImm(op, rl_result.low_reg, rl_src.low_reg, lit); + } + StoreValue(rl_dest, rl_result); +} + +void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + RegLocation rl_result; + OpKind first_op = kOpBkpt; + OpKind second_op = kOpBkpt; + bool call_out = false; + bool check_zero = false; + int func_offset; + int ret_reg = TargetReg(kRet0); + + switch (opcode) { + case Instruction::NOT_LONG: + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + // Check for destructive overlap + if (rl_result.low_reg == rl_src2.high_reg) { + int t_reg = AllocTemp(); + OpRegCopy(t_reg, rl_src2.high_reg); + OpRegReg(kOpMvn, rl_result.low_reg, rl_src2.low_reg); + OpRegReg(kOpMvn, rl_result.high_reg, t_reg); + FreeTemp(t_reg); + } else { + OpRegReg(kOpMvn, rl_result.low_reg, rl_src2.low_reg); + OpRegReg(kOpMvn, rl_result.high_reg, rl_src2.high_reg); + } + StoreValueWide(rl_dest, rl_result); + return; + case Instruction::ADD_LONG: + case Instruction::ADD_LONG_2ADDR: + if (cu_->instruction_set != kThumb2) { + GenAddLong(rl_dest, rl_src1, rl_src2); + return; + } + first_op = kOpAdd; + second_op = kOpAdc; + break; + case Instruction::SUB_LONG: + case Instruction::SUB_LONG_2ADDR: + if (cu_->instruction_set != kThumb2) { + GenSubLong(rl_dest, rl_src1, rl_src2); + return; + } + first_op = kOpSub; + second_op = kOpSbc; + break; + case Instruction::MUL_LONG: + case Instruction::MUL_LONG_2ADDR: + if (cu_->instruction_set == kThumb2) { + GenMulLong(rl_dest, rl_src1, rl_src2); + return; + } else { + call_out = true; + ret_reg = TargetReg(kRet0); + func_offset = ENTRYPOINT_OFFSET(pLmul); + } + break; + case Instruction::DIV_LONG: + case Instruction::DIV_LONG_2ADDR: + call_out = true; + check_zero = true; + ret_reg = TargetReg(kRet0); + func_offset = ENTRYPOINT_OFFSET(pLdiv); + break; + case Instruction::REM_LONG: + case Instruction::REM_LONG_2ADDR: + call_out = true; + check_zero = true; + func_offset = ENTRYPOINT_OFFSET(pLdivmod); + /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */ + ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2) : TargetReg(kRet0); + break; + case Instruction::AND_LONG_2ADDR: + case Instruction::AND_LONG: + if (cu_->instruction_set == kX86) { + return GenAndLong(rl_dest, rl_src1, rl_src2); + } + first_op = kOpAnd; + second_op = kOpAnd; + break; + case Instruction::OR_LONG: + case Instruction::OR_LONG_2ADDR: + if (cu_->instruction_set == kX86) { + GenOrLong(rl_dest, rl_src1, rl_src2); + return; + } + first_op = kOpOr; + second_op = kOpOr; + break; + case Instruction::XOR_LONG: + case Instruction::XOR_LONG_2ADDR: + if (cu_->instruction_set == kX86) { + GenXorLong(rl_dest, rl_src1, rl_src2); + return; + } + first_op = kOpXor; + second_op = kOpXor; + break; + case Instruction::NEG_LONG: { + GenNegLong(rl_dest, rl_src2); + return; + } + default: + LOG(FATAL) << "Invalid long arith op"; + } + if (!call_out) { + GenLong3Addr(first_op, second_op, rl_dest, rl_src1, rl_src2); + } else { + FlushAllRegs(); /* Send everything to home location */ + if (check_zero) { + LoadValueDirectWideFixed(rl_src2, TargetReg(kArg2), TargetReg(kArg3)); + int r_tgt = CallHelperSetup(func_offset); + GenDivZeroCheck(TargetReg(kArg2), TargetReg(kArg3)); + LoadValueDirectWideFixed(rl_src1, TargetReg(kArg0), TargetReg(kArg1)); + // NOTE: callout here is not a safepoint + CallHelper(r_tgt, func_offset, false /* not safepoint */); + } else { + CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); + } + // Adjust return regs in to handle case of rem returning kArg2/kArg3 + if (ret_reg == TargetReg(kRet0)) + rl_result = GetReturnWide(false); + else + rl_result = GetReturnWideAlt(); + StoreValueWide(rl_dest, rl_result); + } +} + +void Mir2Lir::GenConversionCall(int func_offset, + RegLocation rl_dest, RegLocation rl_src) +{ + /* + * Don't optimize the register usage since it calls out to support + * functions + */ + FlushAllRegs(); /* Send everything to home location */ + if (rl_src.wide) { + LoadValueDirectWideFixed(rl_src, rl_src.fp ? TargetReg(kFArg0) : TargetReg(kArg0), + rl_src.fp ? TargetReg(kFArg1) : TargetReg(kArg1)); + } else { + LoadValueDirectFixed(rl_src, rl_src.fp ? TargetReg(kFArg0) : TargetReg(kArg0)); + } + CallRuntimeHelperRegLocation(func_offset, rl_src, false); + if (rl_dest.wide) { + RegLocation rl_result; + rl_result = GetReturnWide(rl_dest.fp); + StoreValueWide(rl_dest, rl_result); + } else { + RegLocation rl_result; + rl_result = GetReturn(rl_dest.fp); + StoreValue(rl_dest, rl_result); + } +} + +/* Check if we need to check for pending suspend request */ +void Mir2Lir::GenSuspendTest(int opt_flags) +{ + if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) { + return; + } + FlushAllRegs(); + LIR* branch = OpTestSuspend(NULL); + LIR* ret_lab = NewLIR0(kPseudoTargetLabel); + LIR* target = RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, + reinterpret_cast<uintptr_t>(ret_lab), current_dalvik_offset_); + branch->target = target; + suspend_launchpads_.Insert(target); +} + +/* Check if we need to check for pending suspend request */ +void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) +{ + if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) { + OpUnconditionalBranch(target); + return; + } + OpTestSuspend(target); + LIR* launch_pad = + RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, + reinterpret_cast<uintptr_t>(target), current_dalvik_offset_); + FlushAllRegs(); + OpUnconditionalBranch(launch_pad); + suspend_launchpads_.Insert(launch_pad); +} + +} // namespace art diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc new file mode 100644 index 0000000000..e3993e0617 --- /dev/null +++ b/compiler/dex/quick/gen_invoke.cc @@ -0,0 +1,1476 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex/compiler_ir.h" +#include "dex_file-inl.h" +#include "invoke_type.h" +#include "mirror/array.h" +#include "mirror/string.h" +#include "mir_to_lir-inl.h" +#include "oat/runtime/oat_support_entrypoints.h" +#include "x86/codegen_x86.h" + +namespace art { + +/* + * This source files contains "gen" codegen routines that should + * be applicable to most targets. Only mid-level support utilities + * and "op" calls may be used here. + */ + +/* + * To save scheduling time, helper calls are broken into two parts: generation of + * the helper target address, and the actuall call to the helper. Because x86 + * has a memory call operation, part 1 is a NOP for x86. For other targets, + * load arguments between the two parts. + */ +int Mir2Lir::CallHelperSetup(int helper_offset) +{ + return (cu_->instruction_set == kX86) ? 0 : LoadHelper(helper_offset); +} + +/* NOTE: if r_tgt is a temp, it will be freed following use */ +LIR* Mir2Lir::CallHelper(int r_tgt, int helper_offset, bool safepoint_pc) +{ + LIR* call_inst; + if (cu_->instruction_set == kX86) { + call_inst = OpThreadMem(kOpBlx, helper_offset); + } else { + call_inst = OpReg(kOpBlx, r_tgt); + FreeTemp(r_tgt); + } + if (safepoint_pc) { + MarkSafepointPC(call_inst); + } + return call_inst; +} + +void Mir2Lir::CallRuntimeHelperImm(int helper_offset, int arg0, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperReg(int helper_offset, int arg0, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + OpRegCopy(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperRegLocation(int helper_offset, RegLocation arg0, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + if (arg0.wide == 0) { + LoadValueDirectFixed(arg0, TargetReg(kArg0)); + } else { + LoadValueDirectWideFixed(arg0, TargetReg(kArg0), TargetReg(kArg1)); + } + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmImm(int helper_offset, int arg0, int arg1, + bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadConstant(TargetReg(kArg0), arg0); + LoadConstant(TargetReg(kArg1), arg1); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmRegLocation(int helper_offset, int arg0, + RegLocation arg1, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + if (arg1.wide == 0) { + LoadValueDirectFixed(arg1, TargetReg(kArg1)); + } else { + LoadValueDirectWideFixed(arg1, TargetReg(kArg1), TargetReg(kArg2)); + } + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperRegLocationImm(int helper_offset, RegLocation arg0, int arg1, + bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadValueDirectFixed(arg0, TargetReg(kArg0)); + LoadConstant(TargetReg(kArg1), arg1); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmReg(int helper_offset, int arg0, int arg1, + bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + OpRegCopy(TargetReg(kArg1), arg1); + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperRegImm(int helper_offset, int arg0, int arg1, + bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + OpRegCopy(TargetReg(kArg0), arg0); + LoadConstant(TargetReg(kArg1), arg1); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmMethod(int helper_offset, int arg0, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadCurrMethodDirect(TargetReg(kArg1)); + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(int helper_offset, RegLocation arg0, + RegLocation arg1, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + if (arg0.wide == 0) { + LoadValueDirectFixed(arg0, arg0.fp ? TargetReg(kFArg0) : TargetReg(kArg0)); + if (arg1.wide == 0) { + if (cu_->instruction_set == kMips) { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1)); + } else { + LoadValueDirectFixed(arg1, TargetReg(kArg1)); + } + } else { + if (cu_->instruction_set == kMips) { + LoadValueDirectWideFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1), arg1.fp ? TargetReg(kFArg3) : TargetReg(kArg2)); + } else { + LoadValueDirectWideFixed(arg1, TargetReg(kArg1), TargetReg(kArg2)); + } + } + } else { + LoadValueDirectWideFixed(arg0, arg0.fp ? TargetReg(kFArg0) : TargetReg(kArg0), arg0.fp ? TargetReg(kFArg1) : TargetReg(kArg1)); + if (arg1.wide == 0) { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2)); + } else { + LoadValueDirectWideFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2), arg1.fp ? TargetReg(kFArg3) : TargetReg(kArg3)); + } + } + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperRegReg(int helper_offset, int arg0, int arg1, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + DCHECK_NE(TargetReg(kArg0), arg1); // check copy into arg0 won't clobber arg1 + OpRegCopy(TargetReg(kArg0), arg0); + OpRegCopy(TargetReg(kArg1), arg1); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperRegRegImm(int helper_offset, int arg0, int arg1, + int arg2, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + DCHECK_NE(TargetReg(kArg0), arg1); // check copy into arg0 won't clobber arg1 + OpRegCopy(TargetReg(kArg0), arg0); + OpRegCopy(TargetReg(kArg1), arg1); + LoadConstant(TargetReg(kArg2), arg2); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmMethodRegLocation(int helper_offset, + int arg0, RegLocation arg2, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadValueDirectFixed(arg2, TargetReg(kArg2)); + LoadCurrMethodDirect(TargetReg(kArg1)); + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmMethodImm(int helper_offset, int arg0, + int arg2, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadCurrMethodDirect(TargetReg(kArg1)); + LoadConstant(TargetReg(kArg2), arg2); + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(int helper_offset, + int arg0, RegLocation arg1, + RegLocation arg2, bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + LoadValueDirectFixed(arg1, TargetReg(kArg1)); + if (arg2.wide == 0) { + LoadValueDirectFixed(arg2, TargetReg(kArg2)); + } else { + LoadValueDirectWideFixed(arg2, TargetReg(kArg2), TargetReg(kArg3)); + } + LoadConstant(TargetReg(kArg0), arg0); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + +/* + * If there are any ins passed in registers that have not been promoted + * to a callee-save register, flush them to the frame. Perform intial + * assignment of promoted arguments. + * + * ArgLocs is an array of location records describing the incoming arguments + * with one location record per word of argument. + */ +void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) +{ + /* + * Dummy up a RegLocation for the incoming Method* + * It will attempt to keep kArg0 live (or copy it to home location + * if promoted). + */ + RegLocation rl_src = rl_method; + rl_src.location = kLocPhysReg; + rl_src.low_reg = TargetReg(kArg0); + rl_src.home = false; + MarkLive(rl_src.low_reg, rl_src.s_reg_low); + StoreValue(rl_method, rl_src); + // If Method* has been promoted, explicitly flush + if (rl_method.location == kLocPhysReg) { + StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + } + + if (cu_->num_ins == 0) + return; + const int num_arg_regs = 3; + static SpecialTargetRegister arg_regs[] = {kArg1, kArg2, kArg3}; + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + /* + * Copy incoming arguments to their proper home locations. + * NOTE: an older version of dx had an issue in which + * it would reuse static method argument registers. + * This could result in the same Dalvik virtual register + * being promoted to both core and fp regs. To account for this, + * we only copy to the corresponding promoted physical register + * if it matches the type of the SSA name for the incoming + * argument. It is also possible that long and double arguments + * end up half-promoted. In those cases, we must flush the promoted + * half to memory as well. + */ + for (int i = 0; i < cu_->num_ins; i++) { + PromotionMap* v_map = &promotion_map_[start_vreg + i]; + if (i < num_arg_regs) { + // If arriving in register + bool need_flush = true; + RegLocation* t_loc = &ArgLocs[i]; + if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { + OpRegCopy(v_map->core_reg, TargetReg(arg_regs[i])); + need_flush = false; + } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { + OpRegCopy(v_map->FpReg, TargetReg(arg_regs[i])); + need_flush = false; + } else { + need_flush = true; + } + + // For wide args, force flush if only half is promoted + if (t_loc->wide) { + PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1); + need_flush |= (p_map->core_location != v_map->core_location) || + (p_map->fp_location != v_map->fp_location); + } + if (need_flush) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), + TargetReg(arg_regs[i]), kWord); + } + } else { + // If arriving in frame & promoted + if (v_map->core_location == kLocPhysReg) { + LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), + v_map->core_reg); + } + if (v_map->fp_location == kLocPhysReg) { + LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), + v_map->FpReg); + } + } + } +} + +/* + * Bit of a hack here - in the absence of a real scheduling pass, + * emit the next instruction in static & direct invoke sequences. + */ +static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t unused, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) +{ + Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + if (cu->instruction_set != kThumb2) { + // Disable sharpening + direct_code = 0; + direct_method = 0; + } + if (direct_code != 0 && direct_method != 0) { + switch (state) { + case 0: // Get the current Method* [sets kArg0] + if (direct_code != static_cast<unsigned int>(-1)) { + cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); + } else { + CHECK_EQ(cu->dex_file, target_method.dex_file); + LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_, + target_method.dex_method_index, 0); + if (data_target == NULL) { + data_target = cg->AddWordData(&cg->code_literal_list_, target_method.dex_method_index); + data_target->operands[1] = type; + } + LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kInvokeTgt), data_target); + cg->AppendLIR(load_pc_rel); + DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target); + } + if (direct_method != static_cast<unsigned int>(-1)) { + cg->LoadConstant(cg->TargetReg(kArg0), direct_method); + } else { + CHECK_EQ(cu->dex_file, target_method.dex_file); + LIR* data_target = cg->ScanLiteralPool(cg->method_literal_list_, + target_method.dex_method_index, 0); + if (data_target == NULL) { + data_target = cg->AddWordData(&cg->method_literal_list_, target_method.dex_method_index); + data_target->operands[1] = type; + } + LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kArg0), data_target); + cg->AppendLIR(load_pc_rel); + DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target); + } + break; + default: + return -1; + } + } else { + switch (state) { + case 0: // Get the current Method* [sets kArg0] + // TUNING: we can save a reg copy if Method* has been promoted. + cg->LoadCurrMethodDirect(cg->TargetReg(kArg0)); + break; + case 1: // Get method->dex_cache_resolved_methods_ + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::AbstractMethod::DexCacheResolvedMethodsOffset().Int32Value(), cg->TargetReg(kArg0)); + // Set up direct code if known. + if (direct_code != 0) { + if (direct_code != static_cast<unsigned int>(-1)) { + cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); + } else { + CHECK_EQ(cu->dex_file, target_method.dex_file); + LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_, + target_method.dex_method_index, 0); + if (data_target == NULL) { + data_target = cg->AddWordData(&cg->code_literal_list_, target_method.dex_method_index); + data_target->operands[1] = type; + } + LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kInvokeTgt), data_target); + cg->AppendLIR(load_pc_rel); + DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target); + } + } + break; + case 2: // Grab target method* + CHECK_EQ(cu->dex_file, target_method.dex_file); + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + + (target_method.dex_method_index * 4), + cg-> TargetReg(kArg0)); + break; + case 3: // Grab the code from the method* + if (cu->instruction_set != kX86) { + if (direct_code == 0) { + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::AbstractMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + } + break; + } + // Intentional fallthrough for x86 + default: + return -1; + } + } + return state + 1; +} + +/* + * Bit of a hack here - in the absence of a real scheduling pass, + * emit the next instruction in a virtual invoke sequence. + * We can use kLr as a temp prior to target address loading + * Note also that we'll load the first argument ("this") into + * kArg1 here rather than the standard LoadArgRegs. + */ +static int NextVCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t method_idx, uintptr_t unused, uintptr_t unused2, + InvokeType unused3) +{ + Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + /* + * This is the fast path in which the target virtual method is + * fully resolved at compile time. + */ + switch (state) { + case 0: { // Get "this" [set kArg1] + RegLocation rl_arg = info->args[0]; + cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1)); + break; + } + case 1: // Is "this" null? [use kArg1] + cg->GenNullCheck(info->args[0].s_reg_low, cg->TargetReg(kArg1), info->opt_flags); + // get this->klass_ [use kArg1, set kInvokeTgt] + cg->LoadWordDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + case 2: // Get this->klass_->vtable [usr kInvokeTgt, set kInvokeTgt] + cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + case 3: // Get target method [use kInvokeTgt, set kArg0] + cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), (method_idx * 4) + + mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(), + cg->TargetReg(kArg0)); + break; + case 4: // Get the compiled code address [uses kArg0, sets kInvokeTgt] + if (cu->instruction_set != kX86) { + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::AbstractMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + } + // Intentional fallthrough for X86 + default: + return -1; + } + return state + 1; +} + +/* + * All invoke-interface calls bounce off of art_quick_invoke_interface_trampoline, + * which will locate the target and continue on via a tail call. + */ +static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state, + const MethodReference& target_method, + uint32_t unused, uintptr_t unused2, + uintptr_t direct_method, InvokeType unused4) +{ + Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + if (cu->instruction_set != kThumb2) { + // Disable sharpening + direct_method = 0; + } + int trampoline = (cu->instruction_set == kX86) ? 0 + : ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline); + + if (direct_method != 0) { + switch (state) { + case 0: // Load the trampoline target [sets kInvokeTgt]. + if (cu->instruction_set != kX86) { + cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline, cg->TargetReg(kInvokeTgt)); + } + // Get the interface Method* [sets kArg0] + if (direct_method != static_cast<unsigned int>(-1)) { + cg->LoadConstant(cg->TargetReg(kArg0), direct_method); + } else { + CHECK_EQ(cu->dex_file, target_method.dex_file); + LIR* data_target = cg->ScanLiteralPool(cg->method_literal_list_, + target_method.dex_method_index, 0); + if (data_target == NULL) { + data_target = cg->AddWordData(&cg->method_literal_list_, + target_method.dex_method_index); + data_target->operands[1] = kInterface; + } + LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kArg0), data_target); + cg->AppendLIR(load_pc_rel); + DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target); + } + break; + default: + return -1; + } + } else { + switch (state) { + case 0: + // Get the current Method* [sets kArg0] - TUNING: remove copy of method if it is promoted. + cg->LoadCurrMethodDirect(cg->TargetReg(kArg0)); + // Load the trampoline target [sets kInvokeTgt]. + if (cu->instruction_set != kX86) { + cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline, cg->TargetReg(kInvokeTgt)); + } + break; + case 1: // Get method->dex_cache_resolved_methods_ [set/use kArg0] + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::AbstractMethod::DexCacheResolvedMethodsOffset().Int32Value(), + cg->TargetReg(kArg0)); + break; + case 2: // Grab target method* [set/use kArg0] + CHECK_EQ(cu->dex_file, target_method.dex_file); + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + + (target_method.dex_method_index * 4), + cg->TargetReg(kArg0)); + break; + default: + return -1; + } + } + return state + 1; +} + +static int NextInvokeInsnSP(CompilationUnit* cu, CallInfo* info, int trampoline, + int state, const MethodReference& target_method, + uint32_t method_idx) +{ + Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + /* + * This handles the case in which the base method is not fully + * resolved at compile time, we bail to a runtime helper. + */ + if (state == 0) { + if (cu->instruction_set != kX86) { + // Load trampoline target + cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline, cg->TargetReg(kInvokeTgt)); + } + // Load kArg0 with method index + CHECK_EQ(cu->dex_file, target_method.dex_file); + cg->LoadConstant(cg->TargetReg(kArg0), target_method.dex_method_index); + return 1; + } + return -1; +} + +static int NextStaticCallInsnSP(CompilationUnit* cu, CallInfo* info, + int state, + const MethodReference& target_method, + uint32_t method_idx, + uintptr_t unused, uintptr_t unused2, + InvokeType unused3) +{ + int trampoline = ENTRYPOINT_OFFSET(pInvokeStaticTrampolineWithAccessCheck); + return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0); +} + +static int NextDirectCallInsnSP(CompilationUnit* cu, CallInfo* info, int state, + const MethodReference& target_method, + uint32_t method_idx, uintptr_t unused, + uintptr_t unused2, InvokeType unused3) +{ + int trampoline = ENTRYPOINT_OFFSET(pInvokeDirectTrampolineWithAccessCheck); + return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0); +} + +static int NextSuperCallInsnSP(CompilationUnit* cu, CallInfo* info, int state, + const MethodReference& target_method, + uint32_t method_idx, uintptr_t unused, + uintptr_t unused2, InvokeType unused3) +{ + int trampoline = ENTRYPOINT_OFFSET(pInvokeSuperTrampolineWithAccessCheck); + return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0); +} + +static int NextVCallInsnSP(CompilationUnit* cu, CallInfo* info, int state, + const MethodReference& target_method, + uint32_t method_idx, uintptr_t unused, + uintptr_t unused2, InvokeType unused3) +{ + int trampoline = ENTRYPOINT_OFFSET(pInvokeVirtualTrampolineWithAccessCheck); + return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0); +} + +static int NextInterfaceCallInsnWithAccessCheck(CompilationUnit* cu, + CallInfo* info, int state, + const MethodReference& target_method, + uint32_t unused, + uintptr_t unused2, uintptr_t unused3, + InvokeType unused4) +{ + int trampoline = ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); + return NextInvokeInsnSP(cu, info, trampoline, state, target_method, 0); +} + +int Mir2Lir::LoadArgRegs(CallInfo* info, int call_state, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) +{ + int last_arg_reg = TargetReg(kArg3); + int next_reg = TargetReg(kArg1); + int next_arg = 0; + if (skip_this) { + next_reg++; + next_arg++; + } + for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) { + RegLocation rl_arg = info->args[next_arg++]; + rl_arg = UpdateRawLoc(rl_arg); + if (rl_arg.wide && (next_reg <= TargetReg(kArg2))) { + LoadValueDirectWideFixed(rl_arg, next_reg, next_reg + 1); + next_reg++; + next_arg++; + } else { + if (rl_arg.wide) { + rl_arg.wide = false; + rl_arg.is_const = false; + } + LoadValueDirectFixed(rl_arg, next_reg); + } + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + return call_state; +} + +/* + * Load up to 5 arguments, the first three of which will be in + * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, + * and as part of the load sequence, it must be replaced with + * the target method pointer. Note, this may also be called + * for "range" variants if the number of arguments is 5 or fewer. + */ +int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, + int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) +{ + RegLocation rl_arg; + + /* If no arguments, just return */ + if (info->num_arg_words == 0) + return call_state; + + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + + DCHECK_LE(info->num_arg_words, 5); + if (info->num_arg_words > 3) { + int32_t next_use = 3; + //Detect special case of wide arg spanning arg3/arg4 + RegLocation rl_use0 = info->args[0]; + RegLocation rl_use1 = info->args[1]; + RegLocation rl_use2 = info->args[2]; + if (((!rl_use0.wide && !rl_use1.wide) || rl_use0.wide) && + rl_use2.wide) { + int reg = -1; + // Wide spans, we need the 2nd half of uses[2]. + rl_arg = UpdateLocWide(rl_use2); + if (rl_arg.location == kLocPhysReg) { + reg = rl_arg.high_reg; + } else { + // kArg2 & rArg3 can safely be used here + reg = TargetReg(kArg3); + LoadWordDisp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg); + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, type); + } + StoreBaseDisp(TargetReg(kSp), (next_use + 1) * 4, reg, kWord); + StoreBaseDisp(TargetReg(kSp), 16 /* (3+1)*4 */, reg, kWord); + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + next_use++; + } + // Loop through the rest + while (next_use < info->num_arg_words) { + int low_reg; + int high_reg = -1; + rl_arg = info->args[next_use]; + rl_arg = UpdateRawLoc(rl_arg); + if (rl_arg.location == kLocPhysReg) { + low_reg = rl_arg.low_reg; + high_reg = rl_arg.high_reg; + } else { + low_reg = TargetReg(kArg2); + if (rl_arg.wide) { + high_reg = TargetReg(kArg3); + LoadValueDirectWideFixed(rl_arg, low_reg, high_reg); + } else { + LoadValueDirectFixed(rl_arg, low_reg); + } + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, type); + } + int outs_offset = (next_use + 1) * 4; + if (rl_arg.wide) { + StoreBaseDispWide(TargetReg(kSp), outs_offset, low_reg, high_reg); + next_use += 2; + } else { + StoreWordDisp(TargetReg(kSp), outs_offset, low_reg); + next_use++; + } + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + } + + call_state = LoadArgRegs(info, call_state, next_call_insn, + target_method, vtable_idx, direct_code, direct_method, + type, skip_this); + + if (pcrLabel) { + *pcrLabel = GenNullCheck(info->args[0].s_reg_low, TargetReg(kArg1), info->opt_flags); + } + return call_state; +} + +/* + * May have 0+ arguments (also used for jumbo). Note that + * source virtual registers may be in physical registers, so may + * need to be flushed to home location before copying. This + * applies to arg3 and above (see below). + * + * Two general strategies: + * If < 20 arguments + * Pass args 3-18 using vldm/vstm block copy + * Pass arg0, arg1 & arg2 in kArg1-kArg3 + * If 20+ arguments + * Pass args arg19+ using memcpy block copy + * Pass arg0, arg1 & arg2 in kArg1-kArg3 + * + */ +int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, + LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, + InvokeType type, bool skip_this) +{ + + // If we can treat it as non-range (Jumbo ops will use range form) + if (info->num_arg_words <= 5) + return GenDalvikArgsNoRange(info, call_state, pcrLabel, + next_call_insn, target_method, vtable_idx, + direct_code, direct_method, type, skip_this); + /* + * First load the non-register arguments. Both forms expect all + * of the source arguments to be in their home frame location, so + * scan the s_reg names and flush any that have been promoted to + * frame backing storage. + */ + // Scan the rest of the args - if in phys_reg flush to memory + for (int next_arg = 0; next_arg < info->num_arg_words;) { + RegLocation loc = info->args[next_arg]; + if (loc.wide) { + loc = UpdateLocWide(loc); + if ((next_arg >= 2) && (loc.location == kLocPhysReg)) { + StoreBaseDispWide(TargetReg(kSp), SRegOffset(loc.s_reg_low), + loc.low_reg, loc.high_reg); + } + next_arg += 2; + } else { + loc = UpdateLoc(loc); + if ((next_arg >= 3) && (loc.location == kLocPhysReg)) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), + loc.low_reg, kWord); + } + next_arg++; + } + } + + int start_offset = SRegOffset(info->args[3].s_reg_low); + int outs_offset = 4 /* Method* */ + (3 * 4); + if (cu_->instruction_set != kThumb2) { + // Generate memcpy + OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset); + OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset); + CallRuntimeHelperRegRegImm(ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0), + TargetReg(kArg1), (info->num_arg_words - 3) * 4, false); + } else { + if (info->num_arg_words >= 20) { + // Generate memcpy + OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset); + OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset); + CallRuntimeHelperRegRegImm(ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0), + TargetReg(kArg1), (info->num_arg_words - 3) * 4, false); + } else { + // Use vldm/vstm pair using kArg3 as a temp + int regs_left = std::min(info->num_arg_words - 3, 16); + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset); + LIR* ld = OpVldm(TargetReg(kArg3), regs_left); + //TUNING: loosen barrier + ld->def_mask = ENCODE_ALL; + SetMemRefType(ld, true /* is_load */, kDalvikReg); + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4)); + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + LIR* st = OpVstm(TargetReg(kArg3), regs_left); + SetMemRefType(st, false /* is_load */, kDalvikReg); + st->def_mask = ENCODE_ALL; + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + } + + call_state = LoadArgRegs(info, call_state, next_call_insn, + target_method, vtable_idx, direct_code, direct_method, + type, skip_this); + + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + if (pcrLabel) { + *pcrLabel = GenNullCheck(info->args[0].s_reg_low, TargetReg(kArg1), info->opt_flags); + } + return call_state; +} + +RegLocation Mir2Lir::InlineTarget(CallInfo* info) +{ + RegLocation res; + if (info->result.location == kLocInvalid) { + res = GetReturn(false); + } else { + res = info->result; + } + return res; +} + +RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) +{ + RegLocation res; + if (info->result.location == kLocInvalid) { + res = GetReturnWide(false); + } else { + res = info->result; + } + return res; +} + +bool Mir2Lir::GenInlinedCharAt(CallInfo* info) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + // Location of reference to data array + int value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count + int count_offset = mirror::String::CountOffset().Int32Value(); + // Starting offset within data array + int offset_offset = mirror::String::OffsetOffset().Int32Value(); + // Start of char data with array_ + int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); + + RegLocation rl_obj = info->args[0]; + RegLocation rl_idx = info->args[1]; + rl_obj = LoadValue(rl_obj, kCoreReg); + rl_idx = LoadValue(rl_idx, kCoreReg); + int reg_max; + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, info->opt_flags); + bool range_check = (!(info->opt_flags & MIR_IGNORE_RANGE_CHECK)); + LIR* launch_pad = NULL; + int reg_off = INVALID_REG; + int reg_ptr = INVALID_REG; + if (cu_->instruction_set != kX86) { + reg_off = AllocTemp(); + reg_ptr = AllocTemp(); + if (range_check) { + reg_max = AllocTemp(); + LoadWordDisp(rl_obj.low_reg, count_offset, reg_max); + } + LoadWordDisp(rl_obj.low_reg, offset_offset, reg_off); + LoadWordDisp(rl_obj.low_reg, value_offset, reg_ptr); + if (range_check) { + // Set up a launch pad to allow retry in case of bounds violation */ + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + intrinsic_launchpads_.Insert(launch_pad); + OpRegReg(kOpCmp, rl_idx.low_reg, reg_max); + FreeTemp(reg_max); + OpCondBranch(kCondCs, launch_pad); + } + } else { + if (range_check) { + reg_max = AllocTemp(); + LoadWordDisp(rl_obj.low_reg, count_offset, reg_max); + // Set up a launch pad to allow retry in case of bounds violation */ + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + intrinsic_launchpads_.Insert(launch_pad); + OpRegReg(kOpCmp, rl_idx.low_reg, reg_max); + FreeTemp(reg_max); + OpCondBranch(kCondCc, launch_pad); + } + reg_off = AllocTemp(); + reg_ptr = AllocTemp(); + LoadWordDisp(rl_obj.low_reg, offset_offset, reg_off); + LoadWordDisp(rl_obj.low_reg, value_offset, reg_ptr); + } + OpRegImm(kOpAdd, reg_ptr, data_offset); + OpRegReg(kOpAdd, reg_off, rl_idx.low_reg); + FreeTemp(rl_obj.low_reg); + FreeTemp(rl_idx.low_reg); + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadBaseIndexed(reg_ptr, reg_off, rl_result.low_reg, 1, kUnsignedHalf); + FreeTemp(reg_off); + FreeTemp(reg_ptr); + StoreValue(rl_dest, rl_result); + if (range_check) { + launch_pad->operands[2] = 0; // no resumption + } + // Record that we've already inlined & null checked + info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK); + return true; +} + +// Generates an inlined String.is_empty or String.length. +bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + // dst = src.length(); + RegLocation rl_obj = info->args[0]; + rl_obj = LoadValue(rl_obj, kCoreReg); + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + GenNullCheck(rl_obj.s_reg_low, rl_obj.low_reg, info->opt_flags); + LoadWordDisp(rl_obj.low_reg, mirror::String::CountOffset().Int32Value(), rl_result.low_reg); + if (is_empty) { + // dst = (dst == 0); + if (cu_->instruction_set == kThumb2) { + int t_reg = AllocTemp(); + OpRegReg(kOpNeg, t_reg, rl_result.low_reg); + OpRegRegReg(kOpAdc, rl_result.low_reg, rl_result.low_reg, t_reg); + } else { + DCHECK_EQ(cu_->instruction_set, kX86); + OpRegImm(kOpSub, rl_result.low_reg, 1); + OpRegImm(kOpLsr, rl_result.low_reg, 31); + } + } + StoreValue(rl_dest, rl_result); + return true; +} + +bool Mir2Lir::GenInlinedAbsInt(CallInfo* info) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + RegLocation rl_src = info->args[0]; + rl_src = LoadValue(rl_src, kCoreReg); + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int sign_reg = AllocTemp(); + // abs(x) = y<=x>>31, (x+y)^y. + OpRegRegImm(kOpAsr, sign_reg, rl_src.low_reg, 31); + OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, sign_reg); + OpRegReg(kOpXor, rl_result.low_reg, sign_reg); + StoreValue(rl_dest, rl_result); + return true; +} + +bool Mir2Lir::GenInlinedAbsLong(CallInfo* info) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + if (cu_->instruction_set == kThumb2) { + RegLocation rl_src = info->args[0]; + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_dest = InlineTargetWide(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int sign_reg = AllocTemp(); + // abs(x) = y<=x>>31, (x+y)^y. + OpRegRegImm(kOpAsr, sign_reg, rl_src.high_reg, 31); + OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, sign_reg); + OpRegRegReg(kOpAdc, rl_result.high_reg, rl_src.high_reg, sign_reg); + OpRegReg(kOpXor, rl_result.low_reg, sign_reg); + OpRegReg(kOpXor, rl_result.high_reg, sign_reg); + StoreValueWide(rl_dest, rl_result); + return true; + } else { + DCHECK_EQ(cu_->instruction_set, kX86); + // Reuse source registers to avoid running out of temps + RegLocation rl_src = info->args[0]; + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_dest = InlineTargetWide(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegCopyWide(rl_result.low_reg, rl_result.high_reg, rl_src.low_reg, rl_src.high_reg); + FreeTemp(rl_src.low_reg); + FreeTemp(rl_src.high_reg); + int sign_reg = AllocTemp(); + // abs(x) = y<=x>>31, (x+y)^y. + OpRegRegImm(kOpAsr, sign_reg, rl_result.high_reg, 31); + OpRegReg(kOpAdd, rl_result.low_reg, sign_reg); + OpRegReg(kOpAdc, rl_result.high_reg, sign_reg); + OpRegReg(kOpXor, rl_result.low_reg, sign_reg); + OpRegReg(kOpXor, rl_result.high_reg, sign_reg); + StoreValueWide(rl_dest, rl_result); + return true; + } +} + +bool Mir2Lir::GenInlinedFloatCvt(CallInfo* info) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTarget(info); + StoreValue(rl_dest, rl_src); + return true; +} + +bool Mir2Lir::GenInlinedDoubleCvt(CallInfo* info) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + RegLocation rl_src = info->args[0]; + RegLocation rl_dest = InlineTargetWide(info); + StoreValueWide(rl_dest, rl_src); + return true; +} + +/* + * Fast string.index_of(I) & (II). Tests for simple case of char <= 0xffff, + * otherwise bails to standard library code. + */ +bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + ClobberCalleeSave(); + LockCallTemps(); // Using fixed registers + int reg_ptr = TargetReg(kArg0); + int reg_char = TargetReg(kArg1); + int reg_start = TargetReg(kArg2); + + RegLocation rl_obj = info->args[0]; + RegLocation rl_char = info->args[1]; + RegLocation rl_start = info->args[2]; + LoadValueDirectFixed(rl_obj, reg_ptr); + LoadValueDirectFixed(rl_char, reg_char); + if (zero_based) { + LoadConstant(reg_start, 0); + } else { + LoadValueDirectFixed(rl_start, reg_start); + } + int r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(ENTRYPOINT_OFFSET(pIndexOf)) : 0; + GenNullCheck(rl_obj.s_reg_low, reg_ptr, info->opt_flags); + LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + intrinsic_launchpads_.Insert(launch_pad); + OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, launch_pad); + // NOTE: not a safepoint + if (cu_->instruction_set != kX86) { + OpReg(kOpBlx, r_tgt); + } else { + OpThreadMem(kOpBlx, ENTRYPOINT_OFFSET(pIndexOf)); + } + LIR* resume_tgt = NewLIR0(kPseudoTargetLabel); + launch_pad->operands[2] = reinterpret_cast<uintptr_t>(resume_tgt); + // Record that we've already inlined & null checked + info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK); + RegLocation rl_return = GetReturn(false); + RegLocation rl_dest = InlineTarget(info); + StoreValue(rl_dest, rl_return); + return true; +} + +/* Fast string.compareTo(Ljava/lang/string;)I. */ +bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) +{ + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + ClobberCalleeSave(); + LockCallTemps(); // Using fixed registers + int reg_this = TargetReg(kArg0); + int reg_cmp = TargetReg(kArg1); + + RegLocation rl_this = info->args[0]; + RegLocation rl_cmp = info->args[1]; + LoadValueDirectFixed(rl_this, reg_this); + LoadValueDirectFixed(rl_cmp, reg_cmp); + int r_tgt = (cu_->instruction_set != kX86) ? + LoadHelper(ENTRYPOINT_OFFSET(pStringCompareTo)) : 0; + GenNullCheck(rl_this.s_reg_low, reg_this, info->opt_flags); + //TUNING: check if rl_cmp.s_reg_low is already null checked + LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + intrinsic_launchpads_.Insert(launch_pad); + OpCmpImmBranch(kCondEq, reg_cmp, 0, launch_pad); + // NOTE: not a safepoint + if (cu_->instruction_set != kX86) { + OpReg(kOpBlx, r_tgt); + } else { + OpThreadMem(kOpBlx, ENTRYPOINT_OFFSET(pStringCompareTo)); + } + launch_pad->operands[2] = 0; // No return possible + // Record that we've already inlined & null checked + info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK); + RegLocation rl_return = GetReturn(false); + RegLocation rl_dest = InlineTarget(info); + StoreValue(rl_dest, rl_return); + return true; +} + +bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) { + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int offset = Thread::PeerOffset().Int32Value(); + if (cu_->instruction_set == kThumb2 || cu_->instruction_set == kMips) { + LoadWordDisp(TargetReg(kSelf), offset, rl_result.low_reg); + } else { + CHECK(cu_->instruction_set == kX86); + ((X86Mir2Lir*)this)->OpRegThreadMem(kOpMov, rl_result.low_reg, offset); + } + StoreValue(rl_dest, rl_result); + return true; +} + +bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, + bool is_long, bool is_volatile) { + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + // Unused - RegLocation rl_src_unsafe = info->args[0]; + RegLocation rl_src_obj = info->args[1]; // Object + RegLocation rl_src_offset = info->args[2]; // long low + rl_src_offset.wide = 0; // ignore high half in info->args[3] + RegLocation rl_dest = InlineTarget(info); // result reg + if (is_volatile) { + GenMemBarrier(kLoadLoad); + } + RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (is_long) { + OpRegReg(kOpAdd, rl_object.low_reg, rl_offset.low_reg); + LoadBaseDispWide(rl_object.low_reg, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); + StoreValueWide(rl_dest, rl_result); + } else { + LoadBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_result.low_reg, 0, kWord); + StoreValue(rl_dest, rl_result); + } + return true; +} + +bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, + bool is_object, bool is_volatile, bool is_ordered) { + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + if (cu_->instruction_set == kX86 && is_object) { + // TODO: fix X86, it exhausts registers for card marking. + return false; + } + // Unused - RegLocation rl_src_unsafe = info->args[0]; + RegLocation rl_src_obj = info->args[1]; // Object + RegLocation rl_src_offset = info->args[2]; // long low + rl_src_offset.wide = 0; // ignore high half in info->args[3] + RegLocation rl_src_value = info->args[4]; // value to store + if (is_volatile || is_ordered) { + GenMemBarrier(kStoreStore); + } + RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); + RegLocation rl_value; + if (is_long) { + rl_value = LoadValueWide(rl_src_value, kCoreReg); + OpRegReg(kOpAdd, rl_object.low_reg, rl_offset.low_reg); + StoreBaseDispWide(rl_object.low_reg, 0, rl_value.low_reg, rl_value.high_reg); + } else { + rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord); + } + if (is_volatile) { + GenMemBarrier(kStoreLoad); + } + if (is_object) { + MarkGCCard(rl_value.low_reg, rl_object.low_reg); + } + return true; +} + +bool Mir2Lir::GenIntrinsic(CallInfo* info) +{ + if (info->opt_flags & MIR_INLINED) { + return false; + } + /* + * TODO: move these to a target-specific structured constant array + * and use a generic match function. The list of intrinsics may be + * slightly different depending on target. + * TODO: Fold this into a matching function that runs during + * basic block building. This should be part of the action for + * small method inlining and recognition of the special object init + * method. By doing this during basic block construction, we can also + * take advantage of/generate new useful dataflow info. + */ + StringPiece tgt_methods_declaring_class( + cu_->dex_file->GetMethodDeclaringClassDescriptor(cu_->dex_file->GetMethodId(info->index))); + if (tgt_methods_declaring_class.starts_with("Ljava/lang/Double;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") { + return GenInlinedDoubleCvt(info); + } + if (tgt_method == "double java.lang.Double.longBitsToDouble(long)") { + return GenInlinedDoubleCvt(info); + } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Float;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "int java.lang.Float.float_to_raw_int_bits(float)") { + return GenInlinedFloatCvt(info); + } + if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") { + return GenInlinedFloatCvt(info); + } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Math;") || + tgt_methods_declaring_class.starts_with("Ljava/lang/StrictMath;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "int java.lang.Math.abs(int)" || + tgt_method == "int java.lang.StrictMath.abs(int)") { + return GenInlinedAbsInt(info); + } + if (tgt_method == "long java.lang.Math.abs(long)" || + tgt_method == "long java.lang.StrictMath.abs(long)") { + return GenInlinedAbsLong(info); + } + if (tgt_method == "int java.lang.Math.max(int, int)" || + tgt_method == "int java.lang.StrictMath.max(int, int)") { + return GenInlinedMinMaxInt(info, false /* is_min */); + } + if (tgt_method == "int java.lang.Math.min(int, int)" || + tgt_method == "int java.lang.StrictMath.min(int, int)") { + return GenInlinedMinMaxInt(info, true /* is_min */); + } + if (tgt_method == "double java.lang.Math.sqrt(double)" || + tgt_method == "double java.lang.StrictMath.sqrt(double)") { + return GenInlinedSqrt(info); + } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/String;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "char java.lang.String.charAt(int)") { + return GenInlinedCharAt(info); + } + if (tgt_method == "int java.lang.String.compareTo(java.lang.String)") { + return GenInlinedStringCompareTo(info); + } + if (tgt_method == "boolean java.lang.String.is_empty()") { + return GenInlinedStringIsEmptyOrLength(info, true /* is_empty */); + } + if (tgt_method == "int java.lang.String.index_of(int, int)") { + return GenInlinedIndexOf(info, false /* base 0 */); + } + if (tgt_method == "int java.lang.String.index_of(int)") { + return GenInlinedIndexOf(info, true /* base 0 */); + } + if (tgt_method == "int java.lang.String.length()") { + return GenInlinedStringIsEmptyOrLength(info, false /* is_empty */); + } + } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Thread;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") { + return GenInlinedCurrentThread(info); + } + } else if (tgt_methods_declaring_class.starts_with("Lsun/misc/Unsafe;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") { + return GenInlinedCas32(info, false); + } + if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") { + return GenInlinedCas32(info, true); + } + if (tgt_method == "int sun.misc.Unsafe.getInt(java.lang.Object, long)") { + return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */); + } + if (tgt_method == "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") { + return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */); + } + if (tgt_method == "void sun.misc.Unsafe.putInt(java.lang.Object, long, int)") { + return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */, + false /* is_volatile */, false /* is_ordered */); + } + if (tgt_method == "void sun.misc.Unsafe.putIntVolatile(java.lang.Object, long, int)") { + return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */, + true /* is_volatile */, false /* is_ordered */); + } + if (tgt_method == "void sun.misc.Unsafe.putOrderedInt(java.lang.Object, long, int)") { + return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */, + false /* is_volatile */, true /* is_ordered */); + } + if (tgt_method == "long sun.misc.Unsafe.getLong(java.lang.Object, long)") { + return GenInlinedUnsafeGet(info, true /* is_long */, false /* is_volatile */); + } + if (tgt_method == "long sun.misc.Unsafe.getLongVolatile(java.lang.Object, long)") { + return GenInlinedUnsafeGet(info, true /* is_long */, true /* is_volatile */); + } + if (tgt_method == "void sun.misc.Unsafe.putLong(java.lang.Object, long, long)") { + return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */, + false /* is_volatile */, false /* is_ordered */); + } + if (tgt_method == "void sun.misc.Unsafe.putLongVolatile(java.lang.Object, long, long)") { + return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */, + true /* is_volatile */, false /* is_ordered */); + } + if (tgt_method == "void sun.misc.Unsafe.putOrderedLong(java.lang.Object, long, long)") { + return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */, + false /* is_volatile */, true /* is_ordered */); + } + if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObject(java.lang.Object, long)") { + return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */); + } + if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") { + return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */); + } + if (tgt_method == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") { + return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */, + false /* is_volatile */, false /* is_ordered */); + } + if (tgt_method == "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") { + return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */, + true /* is_volatile */, false /* is_ordered */); + } + if (tgt_method == "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") { + return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */, + false /* is_volatile */, true /* is_ordered */); + } + } + return false; +} + +void Mir2Lir::GenInvoke(CallInfo* info) +{ + if (GenIntrinsic(info)) { + return; + } + InvokeType original_type = info->type; // avoiding mutation by ComputeInvokeInfo + int call_state = 0; + LIR* null_ck; + LIR** p_null_ck = NULL; + NextCallInsn next_call_insn; + FlushAllRegs(); /* Everything to home location */ + // Explicit register usage + LockCallTemps(); + + DexCompilationUnit* cUnit = mir_graph_->GetCurrentDexCompilationUnit(); + MethodReference target_method(cUnit->GetDexFile(), info->index); + int vtable_idx; + uintptr_t direct_code; + uintptr_t direct_method; + bool skip_this; + bool fast_path = + cu_->compiler_driver->ComputeInvokeInfo(mir_graph_->GetCurrentDexCompilationUnit(), + current_dalvik_offset_, + info->type, target_method, + vtable_idx, + direct_code, direct_method, + true) && !SLOW_INVOKE_PATH; + if (info->type == kInterface) { + if (fast_path) { + p_null_ck = &null_ck; + } + next_call_insn = fast_path ? NextInterfaceCallInsn : NextInterfaceCallInsnWithAccessCheck; + skip_this = false; + } else if (info->type == kDirect) { + if (fast_path) { + p_null_ck = &null_ck; + } + next_call_insn = fast_path ? NextSDCallInsn : NextDirectCallInsnSP; + skip_this = false; + } else if (info->type == kStatic) { + next_call_insn = fast_path ? NextSDCallInsn : NextStaticCallInsnSP; + skip_this = false; + } else if (info->type == kSuper) { + DCHECK(!fast_path); // Fast path is a direct call. + next_call_insn = NextSuperCallInsnSP; + skip_this = false; + } else { + DCHECK_EQ(info->type, kVirtual); + next_call_insn = fast_path ? NextVCallInsn : NextVCallInsnSP; + skip_this = fast_path; + } + if (!info->is_range) { + call_state = GenDalvikArgsNoRange(info, call_state, p_null_ck, + next_call_insn, target_method, + vtable_idx, direct_code, direct_method, + original_type, skip_this); + } else { + call_state = GenDalvikArgsRange(info, call_state, p_null_ck, + next_call_insn, target_method, vtable_idx, + direct_code, direct_method, original_type, + skip_this); + } + // Finish up any of the call sequence not interleaved in arg loading + while (call_state >= 0) { + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, + original_type); + } + LIR* call_inst; + if (cu_->instruction_set != kX86) { + call_inst = OpReg(kOpBlx, TargetReg(kInvokeTgt)); + } else { + if (fast_path && info->type != kInterface) { + call_inst = OpMem(kOpBlx, TargetReg(kArg0), + mirror::AbstractMethod::GetEntryPointFromCompiledCodeOffset().Int32Value()); + } else { + int trampoline = 0; + switch (info->type) { + case kInterface: + trampoline = fast_path ? ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline) + : ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); + break; + case kDirect: + trampoline = ENTRYPOINT_OFFSET(pInvokeDirectTrampolineWithAccessCheck); + break; + case kStatic: + trampoline = ENTRYPOINT_OFFSET(pInvokeStaticTrampolineWithAccessCheck); + break; + case kSuper: + trampoline = ENTRYPOINT_OFFSET(pInvokeSuperTrampolineWithAccessCheck); + break; + case kVirtual: + trampoline = ENTRYPOINT_OFFSET(pInvokeVirtualTrampolineWithAccessCheck); + break; + default: + LOG(FATAL) << "Unexpected invoke type"; + } + call_inst = OpThreadMem(kOpBlx, trampoline); + } + } + MarkSafepointPC(call_inst); + + ClobberCalleeSave(); + if (info->result.location != kLocInvalid) { + // We have a following MOVE_RESULT - do it now. + if (info->result.wide) { + RegLocation ret_loc = GetReturnWide(info->result.fp); + StoreValueWide(info->result, ret_loc); + } else { + RegLocation ret_loc = GetReturn(info->result.fp); + StoreValue(info->result, ret_loc); + } + } +} + +} // namespace art diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc new file mode 100644 index 0000000000..6a25c1db45 --- /dev/null +++ b/compiler/dex/quick/gen_loadstore.cc @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex/compiler_ir.h" +#include "dex/compiler_internals.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "invoke_type.h" + +namespace art { + +/* This file contains target-independent codegen and support. */ + +/* + * Load an immediate value into a fixed or temp register. Target + * register is clobbered, and marked in_use. + */ +LIR* Mir2Lir::LoadConstant(int r_dest, int value) +{ + if (IsTemp(r_dest)) { + Clobber(r_dest); + MarkInUse(r_dest); + } + return LoadConstantNoClobber(r_dest, value); +} + +/* + * Temporary workaround for Issue 7250540. If we're loading a constant zero into a + * promoted floating point register, also copy a zero into the int/ref identity of + * that sreg. + */ +void Mir2Lir::Workaround7250540(RegLocation rl_dest, int zero_reg) +{ + if (rl_dest.fp) { + int pmap_index = SRegToPMap(rl_dest.s_reg_low); + if (promotion_map_[pmap_index].fp_location == kLocPhysReg) { + // Now, determine if this vreg is ever used as a reference. If not, we're done. + bool used_as_reference = false; + int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); + for (int i = 0; !used_as_reference && (i < mir_graph_->GetNumSSARegs()); i++) { + if (mir_graph_->SRegToVReg(mir_graph_->reg_location_[i].s_reg_low) == base_vreg) { + used_as_reference |= mir_graph_->reg_location_[i].ref; + } + } + if (!used_as_reference) { + return; + } + int temp_reg = zero_reg; + if (temp_reg == INVALID_REG) { + temp_reg = AllocTemp(); + LoadConstant(temp_reg, 0); + } + if (promotion_map_[pmap_index].core_location == kLocPhysReg) { + // Promoted - just copy in a zero + OpRegCopy(promotion_map_[pmap_index].core_reg, temp_reg); + } else { + // Lives in the frame, need to store. + StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, kWord); + } + if (zero_reg == INVALID_REG) { + FreeTemp(temp_reg); + } + } + } +} + +/* Load a word at base + displacement. Displacement must be word multiple */ +LIR* Mir2Lir::LoadWordDisp(int rBase, int displacement, int r_dest) +{ + return LoadBaseDisp(rBase, displacement, r_dest, kWord, + INVALID_SREG); +} + +LIR* Mir2Lir::StoreWordDisp(int rBase, int displacement, int r_src) +{ + return StoreBaseDisp(rBase, displacement, r_src, kWord); +} + +/* + * Load a Dalvik register into a physical register. Take care when + * using this routine, as it doesn't perform any bookkeeping regarding + * register liveness. That is the responsibility of the caller. + */ +void Mir2Lir::LoadValueDirect(RegLocation rl_src, int r_dest) +{ + rl_src = UpdateLoc(rl_src); + if (rl_src.location == kLocPhysReg) { + OpRegCopy(r_dest, rl_src.low_reg); + } else if (IsInexpensiveConstant(rl_src)) { + LoadConstantNoClobber(r_dest, mir_graph_->ConstantValue(rl_src)); + } else { + DCHECK((rl_src.location == kLocDalvikFrame) || + (rl_src.location == kLocCompilerTemp)); + LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest); + } +} + +/* + * Similar to LoadValueDirect, but clobbers and allocates the target + * register. Should be used when loading to a fixed register (for example, + * loading arguments to an out of line call. + */ +void Mir2Lir::LoadValueDirectFixed(RegLocation rl_src, int r_dest) +{ + Clobber(r_dest); + MarkInUse(r_dest); + LoadValueDirect(rl_src, r_dest); +} + +/* + * Load a Dalvik register pair into a physical register[s]. Take care when + * using this routine, as it doesn't perform any bookkeeping regarding + * register liveness. That is the responsibility of the caller. + */ +void Mir2Lir::LoadValueDirectWide(RegLocation rl_src, int reg_lo, + int reg_hi) +{ + rl_src = UpdateLocWide(rl_src); + if (rl_src.location == kLocPhysReg) { + OpRegCopyWide(reg_lo, reg_hi, rl_src.low_reg, rl_src.high_reg); + } else if (IsInexpensiveConstant(rl_src)) { + LoadConstantWide(reg_lo, reg_hi, mir_graph_->ConstantValueWide(rl_src)); + } else { + DCHECK((rl_src.location == kLocDalvikFrame) || + (rl_src.location == kLocCompilerTemp)); + LoadBaseDispWide(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), + reg_lo, reg_hi, INVALID_SREG); + } +} + +/* + * Similar to LoadValueDirect, but clobbers and allocates the target + * registers. Should be used when loading to a fixed registers (for example, + * loading arguments to an out of line call. + */ +void Mir2Lir::LoadValueDirectWideFixed(RegLocation rl_src, int reg_lo, + int reg_hi) +{ + Clobber(reg_lo); + Clobber(reg_hi); + MarkInUse(reg_lo); + MarkInUse(reg_hi); + LoadValueDirectWide(rl_src, reg_lo, reg_hi); +} + +RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) +{ + rl_src = EvalLoc(rl_src, op_kind, false); + if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) { + LoadValueDirect(rl_src, rl_src.low_reg); + rl_src.location = kLocPhysReg; + MarkLive(rl_src.low_reg, rl_src.s_reg_low); + } + return rl_src; +} + +void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) +{ + /* + * Sanity checking - should never try to store to the same + * ssa name during the compilation of a single instruction + * without an intervening ClobberSReg(). + */ + if (kIsDebugBuild) { + DCHECK((live_sreg_ == INVALID_SREG) || + (rl_dest.s_reg_low != live_sreg_)); + live_sreg_ = rl_dest.s_reg_low; + } + LIR* def_start; + LIR* def_end; + DCHECK(!rl_dest.wide); + DCHECK(!rl_src.wide); + rl_src = UpdateLoc(rl_src); + rl_dest = UpdateLoc(rl_dest); + if (rl_src.location == kLocPhysReg) { + if (IsLive(rl_src.low_reg) || + IsPromoted(rl_src.low_reg) || + (rl_dest.location == kLocPhysReg)) { + // Src is live/promoted or Dest has assigned reg. + rl_dest = EvalLoc(rl_dest, kAnyReg, false); + OpRegCopy(rl_dest.low_reg, rl_src.low_reg); + } else { + // Just re-assign the registers. Dest gets Src's regs + rl_dest.low_reg = rl_src.low_reg; + Clobber(rl_src.low_reg); + } + } else { + // Load Src either into promoted Dest or temps allocated for Dest + rl_dest = EvalLoc(rl_dest, kAnyReg, false); + LoadValueDirect(rl_src, rl_dest.low_reg); + } + + // Dest is now live and dirty (until/if we flush it to home location) + MarkLive(rl_dest.low_reg, rl_dest.s_reg_low); + MarkDirty(rl_dest); + + + ResetDefLoc(rl_dest); + if (IsDirty(rl_dest.low_reg) && + oat_live_out(rl_dest.s_reg_low)) { + def_start = last_lir_insn_; + StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), + rl_dest.low_reg, kWord); + MarkClean(rl_dest); + def_end = last_lir_insn_; + if (!rl_dest.ref) { + // Exclude references from store elimination + MarkDef(rl_dest, def_start, def_end); + } + } +} + +RegLocation Mir2Lir::LoadValueWide(RegLocation rl_src, RegisterClass op_kind) +{ + DCHECK(rl_src.wide); + rl_src = EvalLoc(rl_src, op_kind, false); + if (IsInexpensiveConstant(rl_src) || rl_src.location != kLocPhysReg) { + LoadValueDirectWide(rl_src, rl_src.low_reg, rl_src.high_reg); + rl_src.location = kLocPhysReg; + MarkLive(rl_src.low_reg, rl_src.s_reg_low); + MarkLive(rl_src.high_reg, GetSRegHi(rl_src.s_reg_low)); + } + return rl_src; +} + +void Mir2Lir::StoreValueWide(RegLocation rl_dest, RegLocation rl_src) +{ + /* + * Sanity checking - should never try to store to the same + * ssa name during the compilation of a single instruction + * without an intervening ClobberSReg(). + */ + if (kIsDebugBuild) { + DCHECK((live_sreg_ == INVALID_SREG) || + (rl_dest.s_reg_low != live_sreg_)); + live_sreg_ = rl_dest.s_reg_low; + } + LIR* def_start; + LIR* def_end; + DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg)); + DCHECK(rl_dest.wide); + DCHECK(rl_src.wide); + if (rl_src.location == kLocPhysReg) { + if (IsLive(rl_src.low_reg) || + IsLive(rl_src.high_reg) || + IsPromoted(rl_src.low_reg) || + IsPromoted(rl_src.high_reg) || + (rl_dest.location == kLocPhysReg)) { + // Src is live or promoted or Dest has assigned reg. + rl_dest = EvalLoc(rl_dest, kAnyReg, false); + OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, + rl_src.low_reg, rl_src.high_reg); + } else { + // Just re-assign the registers. Dest gets Src's regs + rl_dest.low_reg = rl_src.low_reg; + rl_dest.high_reg = rl_src.high_reg; + Clobber(rl_src.low_reg); + Clobber(rl_src.high_reg); + } + } else { + // Load Src either into promoted Dest or temps allocated for Dest + rl_dest = EvalLoc(rl_dest, kAnyReg, false); + LoadValueDirectWide(rl_src, rl_dest.low_reg, rl_dest.high_reg); + } + + // Dest is now live and dirty (until/if we flush it to home location) + MarkLive(rl_dest.low_reg, rl_dest.s_reg_low); + MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low)); + MarkDirty(rl_dest); + MarkPair(rl_dest.low_reg, rl_dest.high_reg); + + + ResetDefLocWide(rl_dest); + if ((IsDirty(rl_dest.low_reg) || + IsDirty(rl_dest.high_reg)) && + (oat_live_out(rl_dest.s_reg_low) || + oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) { + def_start = last_lir_insn_; + DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1), + mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low))); + StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), + rl_dest.low_reg, rl_dest.high_reg); + MarkClean(rl_dest); + def_end = last_lir_insn_; + MarkDefWide(rl_dest, def_start, def_end); + } +} + +/* Utilities to load the current Method* */ +void Mir2Lir::LoadCurrMethodDirect(int r_tgt) +{ + LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt); +} + +RegLocation Mir2Lir::LoadCurrMethod() +{ + return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg); +} + +} // namespace art diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc new file mode 100644 index 0000000000..ac654d8f21 --- /dev/null +++ b/compiler/dex/quick/local_optimizations.cc @@ -0,0 +1,499 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex/compiler_internals.h" + +namespace art { + +#define DEBUG_OPT(X) + +/* Check RAW, WAR, and RAW dependency on the register operands */ +#define CHECK_REG_DEP(use, def, check) ((def & check->use_mask) || \ + ((use | def) & check->def_mask)) + +/* Scheduler heuristics */ +#define MAX_HOIST_DISTANCE 20 +#define LDLD_DISTANCE 4 +#define LD_LATENCY 2 + +static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) +{ + int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->alias_info); + int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->alias_info); + int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->alias_info); + int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->alias_info); + + return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo); +} + +/* Convert a more expensive instruction (ie load) into a move */ +void Mir2Lir::ConvertMemOpIntoMove(LIR* orig_lir, int dest, int src) +{ + /* Insert a move to replace the load */ + LIR* move_lir; + move_lir = OpRegCopyNoInsert(dest, src); + /* + * Insert the converted instruction after the original since the + * optimization is scannng in the top-down order and the new instruction + * will need to be re-checked (eg the new dest clobbers the src used in + * this_lir). + */ + InsertLIRAfter(orig_lir, move_lir); +} + +/* + * Perform a pass of top-down walk, from the second-last instruction in the + * superblock, to eliminate redundant loads and stores. + * + * An earlier load can eliminate a later load iff + * 1) They are must-aliases + * 2) The native register is not clobbered in between + * 3) The memory location is not written to in between + * + * An earlier store can eliminate a later load iff + * 1) They are must-aliases + * 2) The native register is not clobbered in between + * 3) The memory location is not written to in between + * + * A later store can be eliminated by an earlier store iff + * 1) They are must-aliases + * 2) The memory location is not written to in between + */ +void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) +{ + LIR* this_lir; + + if (head_lir == tail_lir) return; + + for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) { + + if (is_pseudo_opcode(this_lir->opcode)) continue; + + int sink_distance = 0; + + uint64_t target_flags = GetTargetInstFlags(this_lir->opcode); + + /* Skip non-interesting instructions */ + if ((this_lir->flags.is_nop == true) || + (target_flags & IS_BRANCH) || + ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads. + ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) == + (REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores. + !(target_flags & (IS_LOAD | IS_STORE))) { + continue; + } + + int native_reg_id; + if (cu_->instruction_set == kX86) { + // If x86, location differs depending on whether memory/reg operation. + native_reg_id = (GetTargetInstFlags(this_lir->opcode) & IS_STORE) ? this_lir->operands[2] + : this_lir->operands[0]; + } else { + native_reg_id = this_lir->operands[0]; + } + bool is_this_lir_load = GetTargetInstFlags(this_lir->opcode) & IS_LOAD; + LIR* check_lir; + /* Use the mem mask to determine the rough memory location */ + uint64_t this_mem_mask = (this_lir->use_mask | this_lir->def_mask) & ENCODE_MEM; + + /* + * Currently only eliminate redundant ld/st for constant and Dalvik + * register accesses. + */ + if (!(this_mem_mask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue; + + uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM; + uint64_t stop_use_reg_mask; + if (cu_->instruction_set == kX86) { + stop_use_reg_mask = (IS_BRANCH | this_lir->use_mask) & ~ENCODE_MEM; + } else { + /* + * Add pc to the resource mask to prevent this instruction + * from sinking past branch instructions. Also take out the memory + * region bits since stop_mask is used to check data/control + * dependencies. + */ + stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->use_mask) & ~ENCODE_MEM; + } + + for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) { + + /* + * Skip already dead instructions (whose dataflow information is + * outdated and misleading). + */ + if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) continue; + + uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM; + uint64_t alias_condition = this_mem_mask & check_mem_mask; + bool stop_here = false; + + /* + * Potential aliases seen - check the alias relations + */ + uint64_t check_flags = GetTargetInstFlags(check_lir->opcode); + // TUNING: Support instructions with multiple register targets. + if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) { + stop_here = true; + } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + bool is_check_lir_load = check_flags & IS_LOAD; + if (alias_condition == ENCODE_LITERAL) { + /* + * Should only see literal loads in the instruction + * stream. + */ + DCHECK(!(check_flags & IS_STORE)); + /* Same value && same register type */ + if (check_lir->alias_info == this_lir->alias_info && + SameRegType(check_lir->operands[0], native_reg_id)) { + /* + * Different destination register - insert + * a move + */ + if (check_lir->operands[0] != native_reg_id) { + ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id); + } + check_lir->flags.is_nop = true; + } + } else if (alias_condition == ENCODE_DALVIK_REG) { + /* Must alias */ + if (check_lir->alias_info == this_lir->alias_info) { + /* Only optimize compatible registers */ + bool reg_compatible = SameRegType(check_lir->operands[0], native_reg_id); + if ((is_this_lir_load && is_check_lir_load) || + (!is_this_lir_load && is_check_lir_load)) { + /* RAR or RAW */ + if (reg_compatible) { + /* + * Different destination register - + * insert a move + */ + if (check_lir->operands[0] != + native_reg_id) { + ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id); + } + check_lir->flags.is_nop = true; + } else { + /* + * Destinaions are of different types - + * something complicated going on so + * stop looking now. + */ + stop_here = true; + } + } else if (is_this_lir_load && !is_check_lir_load) { + /* WAR - register value is killed */ + stop_here = true; + } else if (!is_this_lir_load && !is_check_lir_load) { + /* WAW - nuke the earlier store */ + this_lir->flags.is_nop = true; + stop_here = true; + } + /* Partial overlap */ + } else if (IsDalvikRegisterClobbered(this_lir, check_lir)) { + /* + * It is actually ok to continue if check_lir + * is a read. But it is hard to make a test + * case for this so we just stop here to be + * conservative. + */ + stop_here = true; + } + } + /* Memory content may be updated. Stop looking now. */ + if (stop_here) { + break; + /* The check_lir has been transformed - check the next one */ + } else if (check_lir->flags.is_nop) { + continue; + } + } + + + /* + * this and check LIRs have no memory dependency. Now check if + * their register operands have any RAW, WAR, and WAW + * dependencies. If so, stop looking. + */ + if (stop_here == false) { + stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask, check_lir); + } + + if (stop_here == true) { + if (cu_->instruction_set == kX86) { + // Prevent stores from being sunk between ops that generate ccodes and + // ops that use them. + uint64_t flags = GetTargetInstFlags(check_lir->opcode); + if (sink_distance > 0 && (flags & IS_BRANCH) && (flags & USES_CCODES)) { + check_lir = PREV_LIR(check_lir); + sink_distance--; + } + } + DEBUG_OPT(dump_dependent_insn_pair(this_lir, check_lir, "REG CLOBBERED")); + /* Only sink store instructions */ + if (sink_distance && !is_this_lir_load) { + LIR* new_store_lir = + static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR)); + *new_store_lir = *this_lir; + /* + * Stop point found - insert *before* the check_lir + * since the instruction list is scanned in the + * top-down order. + */ + InsertLIRBefore(check_lir, new_store_lir); + this_lir->flags.is_nop = true; + } + break; + } else if (!check_lir->flags.is_nop) { + sink_distance++; + } + } + } +} + +/* + * Perform a pass of bottom-up walk, from the second instruction in the + * superblock, to try to hoist loads to earlier slots. + */ +void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) +{ + LIR* this_lir, *check_lir; + /* + * Store the list of independent instructions that can be hoisted past. + * Will decide the best place to insert later. + */ + LIR* prev_inst_list[MAX_HOIST_DISTANCE]; + + /* Empty block */ + if (head_lir == tail_lir) return; + + /* Start from the second instruction */ + for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) { + + if (is_pseudo_opcode(this_lir->opcode)) continue; + + uint64_t target_flags = GetTargetInstFlags(this_lir->opcode); + /* Skip non-interesting instructions */ + if ((this_lir->flags.is_nop == true) || + ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || + !(target_flags & IS_LOAD)) { + continue; + } + + uint64_t stop_use_all_mask = this_lir->use_mask; + + if (cu_->instruction_set != kX86) { + /* + * Branches for null/range checks are marked with the true resource + * bits, and loads to Dalvik registers, constant pools, and non-alias + * locations are safe to be hoisted. So only mark the heap references + * conservatively here. + */ + if (stop_use_all_mask & ENCODE_HEAP_REF) { + stop_use_all_mask |= GetPCUseDefEncoding(); + } + } + + /* Similar as above, but just check for pure register dependency */ + uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM; + uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM; + + int next_slot = 0; + bool stop_here = false; + + /* Try to hoist the load to a good spot */ + for (check_lir = PREV_LIR(this_lir); check_lir != head_lir; check_lir = PREV_LIR(check_lir)) { + + /* + * Skip already dead instructions (whose dataflow information is + * outdated and misleading). + */ + if (check_lir->flags.is_nop) continue; + + uint64_t check_mem_mask = check_lir->def_mask & ENCODE_MEM; + uint64_t alias_condition = stop_use_all_mask & check_mem_mask; + stop_here = false; + + /* Potential WAR alias seen - check the exact relation */ + if (check_mem_mask != ENCODE_MEM && alias_condition != 0) { + /* We can fully disambiguate Dalvik references */ + if (alias_condition == ENCODE_DALVIK_REG) { + /* Must alias or partually overlap */ + if ((check_lir->alias_info == this_lir->alias_info) || + IsDalvikRegisterClobbered(this_lir, check_lir)) { + stop_here = true; + } + /* Conservatively treat all heap refs as may-alias */ + } else { + DCHECK_EQ(alias_condition, ENCODE_HEAP_REF); + stop_here = true; + } + /* Memory content may be updated. Stop looking now. */ + if (stop_here) { + prev_inst_list[next_slot++] = check_lir; + break; + } + } + + if (stop_here == false) { + stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask, + check_lir); + } + + /* + * Store the dependent or non-pseudo/indepedent instruction to the + * list. + */ + if (stop_here || !is_pseudo_opcode(check_lir->opcode)) { + prev_inst_list[next_slot++] = check_lir; + if (next_slot == MAX_HOIST_DISTANCE) break; + } + + /* Found a new place to put the load - move it here */ + if (stop_here == true) { + DEBUG_OPT(dump_dependent_insn_pair(check_lir, this_lir "HOIST STOP")); + break; + } + } + + /* + * Reached the top - use head_lir as the dependent marker as all labels + * are barriers. + */ + if (stop_here == false && next_slot < MAX_HOIST_DISTANCE) { + prev_inst_list[next_slot++] = head_lir; + } + + /* + * At least one independent instruction is found. Scan in the reversed + * direction to find a beneficial slot. + */ + if (next_slot >= 2) { + int first_slot = next_slot - 2; + int slot; + LIR* dep_lir = prev_inst_list[next_slot-1]; + /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */ + if (!is_pseudo_opcode(dep_lir->opcode) && + (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) { + first_slot -= LDLD_DISTANCE; + } + /* + * Make sure we check slot >= 0 since first_slot may be negative + * when the loop is first entered. + */ + for (slot = first_slot; slot >= 0; slot--) { + LIR* cur_lir = prev_inst_list[slot]; + LIR* prev_lir = prev_inst_list[slot+1]; + + /* Check the highest instruction */ + if (prev_lir->def_mask == ENCODE_ALL) { + /* + * If the first instruction is a load, don't hoist anything + * above it since it is unlikely to be beneficial. + */ + if (GetTargetInstFlags(cur_lir->opcode) & IS_LOAD) continue; + /* + * If the remaining number of slots is less than LD_LATENCY, + * insert the hoisted load here. + */ + if (slot < LD_LATENCY) break; + } + + // Don't look across a barrier label + if ((prev_lir->opcode == kPseudoTargetLabel) || + (prev_lir->opcode == kPseudoSafepointPC) || + (prev_lir->opcode == kPseudoBarrier)) { + break; + } + + /* + * Try to find two instructions with load/use dependency until + * the remaining instructions are less than LD_LATENCY. + */ + bool prev_is_load = is_pseudo_opcode(prev_lir->opcode) ? false : + (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD); + if (((cur_lir->use_mask & prev_lir->def_mask) && prev_is_load) || (slot < LD_LATENCY)) { + break; + } + } + + /* Found a slot to hoist to */ + if (slot >= 0) { + LIR* cur_lir = prev_inst_list[slot]; + LIR* new_load_lir = + static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR)); + *new_load_lir = *this_lir; + /* + * Insertion is guaranteed to succeed since check_lir + * is never the first LIR on the list + */ + InsertLIRBefore(cur_lir, new_load_lir); + this_lir->flags.is_nop = true; + } + } + } +} + +void Mir2Lir::ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir) +{ + if (!(cu_->disable_opt & (1 << kLoadStoreElimination))) { + ApplyLoadStoreElimination(head_lir, tail_lir); + } + if (!(cu_->disable_opt & (1 << kLoadHoisting))) { + ApplyLoadHoisting(head_lir, tail_lir); + } +} + +/* + * Nop any unconditional branches that go to the next instruction. + * Note: new redundant branches may be inserted later, and we'll + * use a check in final instruction assembly to nop those out. + */ +void Mir2Lir::RemoveRedundantBranches() +{ + LIR* this_lir; + + for (this_lir = first_lir_insn_; this_lir != last_lir_insn_; this_lir = NEXT_LIR(this_lir)) { + + /* Branch to the next instruction */ + if (IsUnconditionalBranch(this_lir)) { + LIR* next_lir = this_lir; + + while (true) { + next_lir = NEXT_LIR(next_lir); + + /* + * Is the branch target the next instruction? + */ + if (next_lir == this_lir->target) { + this_lir->flags.is_nop = true; + break; + } + + /* + * Found real useful stuff between the branch and the target. + * Need to explicitly check the last_lir_insn_ here because it + * might be the last real instruction. + */ + if (!is_pseudo_opcode(next_lir->opcode) || + (next_lir == last_lir_insn_)) + break; + } + } + } +} + +} // namespace art diff --git a/compiler/dex/quick/mips/README.mips b/compiler/dex/quick/mips/README.mips new file mode 100644 index 0000000000..061c157256 --- /dev/null +++ b/compiler/dex/quick/mips/README.mips @@ -0,0 +1,57 @@ + Notes on the Mips target (3/4/2012) + ----------------------------------- + +Testing + +The initial implementation of Mips support in the compiler is untested on +actual hardware, and as such should be expected to have many bugs. However, +the vast majority of code for Mips support is either shared with other +tested targets, or was taken from the functional Mips JIT compiler. The +expectation is that when it is first tried out on actual hardware lots of +small bugs will be flushed out, but it should not take long to get it +solidly running. The following areas are considered most likely to have +problems that need to be addressed: + + o Endianness. Focus was on little-endian support, and if a big-endian + target is desired, you should pay particular attention to the + code generation for switch tables, fill array data, 64-bit + data handling and the register usage conventions. + + o The memory model. Verify that oatGenMemoryBarrier() generates the + appropriate flavor of sync. + +Register promotion + +The resource masks in the LIR structure are 64-bits wide, which is enough +room to fully describe def/use info for Arm and x86 instructions. However, +the larger number of MIPS core and float registers render this too small. +Currently, the workaround for this limitation is to avoid using floating +point registers 16-31. These are the callee-save registers, which therefore +means that no floating point promotion is allowed. Among the solution are: + o Expand the def/use mask (which, unfortunately, is a significant change) + o The Arm target uses 52 of the 64 bits, so we could support float + registers 16-27 without much effort. + o We could likely assign the 4 non-register bits (kDalvikReg, kLiteral, + kHeapRef & kMustNotAlias) to positions occuped by MIPS registers that + don't need def/use bits because they are never modified by code + subject to scheduling: r_K0, r_K1, r_SP, r_ZERO, r_S1 (rSELF). + +Branch delay slots + +Little to no attempt was made to fill branch delay slots. Branch +instructions in the encoding map are given a length of 8 bytes to include +an implicit NOP. It should not be too difficult to provide a slot-filling +pass following successful assembly, but thought should be given to the +design. Branches are currently treated as scheduling barriers. One +simple solution would be to copy the instruction at branch targets to the +slot and adjust the displacement. However, given that code expansion is +already a problem it would be preferable to use a more sophisticated +scheduling solution. + +Code expansion + +Code expansion for the MIPS target is significantly higher than we see +for Arm and x86. It might make sense to replace the inline code generation +for some of the more verbose Dalik byte codes with subroutine calls to +shared helper functions. + diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc new file mode 100644 index 0000000000..2482aa4fbb --- /dev/null +++ b/compiler/dex/quick/mips/assemble_mips.cc @@ -0,0 +1,716 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_mips.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mips_lir.h" + +namespace art { + +#define MAX_ASSEMBLER_RETRIES 50 + +/* + * opcode: MipsOpCode enum + * skeleton: pre-designated bit-pattern for this opcode + * k0: key to applying ds/de + * ds: dest start bit position + * de: dest end bit position + * k1: key to applying s1s/s1e + * s1s: src1 start bit position + * s1e: src1 end bit position + * k2: key to applying s2s/s2e + * s2s: src2 start bit position + * s2e: src2 end bit position + * operands: number of operands (for sanity check purposes) + * name: mnemonic name + * fmt: for pretty-printing + */ +#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ + k3, k3s, k3e, flags, name, fmt, size) \ + {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ + {k3, k3s, k3e}}, opcode, flags, name, fmt, size} + +/* Instruction dump string format keys: !pf, where "!" is the start + * of the key, "p" is which numeric operand to use and "f" is the + * print format. + * + * [p]ositions: + * 0 -> operands[0] (dest) + * 1 -> operands[1] (src1) + * 2 -> operands[2] (src2) + * 3 -> operands[3] (extra) + * + * [f]ormats: + * h -> 4-digit hex + * d -> decimal + * E -> decimal*4 + * F -> decimal*2 + * c -> branch condition (beq, bne, etc.) + * t -> pc-relative target + * T -> pc-region target + * u -> 1st half of bl[x] target + * v -> 2nd half ob bl[x] target + * R -> register list + * s -> single precision floating point register + * S -> double precision floating point register + * m -> Thumb2 modified immediate + * n -> complimented Thumb2 modified immediate + * M -> Thumb2 16-bit zero-extended immediate + * b -> 4-digit binary + * N -> append a NOP + * + * [!] escape. To insert "!", use "!!" + */ +/* NOTE: must be kept in sync with enum MipsOpcode from LIR.h */ +/* + * TUNING: We're currently punting on the branch delay slots. All branch + * instructions in this map are given a size of 8, which during assembly + * is expanded to include a nop. This scheme should be replaced with + * an assembler pass to fill those slots when possible. + */ +const MipsEncodingMap MipsMir2Lir::EncodingMap[kMipsLast] = { + ENCODING_MAP(kMips32BitData, 0x00000000, + kFmtBitBlt, 31, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, + "data", "0x!0h(!0d)", 4), + ENCODING_MAP(kMipsAddiu, 0x24000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "addiu", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsAddu, 0x00000021, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "addu", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsAnd, 0x00000024, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "and", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsAndi, 0x30000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "andi", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsB, 0x10000000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, + "b", "!0t!0N", 8), + ENCODING_MAP(kMipsBal, 0x04110000, + kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | + NEEDS_FIXUP, "bal", "!0t!0N", 8), + ENCODING_MAP(kMipsBeq, 0x10000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_USE01 | + NEEDS_FIXUP, "beq", "!0r,!1r,!2t!0N", 8), + ENCODING_MAP(kMipsBeqz, 0x10000000, /* same as beq above with t = $zero */ + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "beqz", "!0r,!1t!0N", 8), + ENCODING_MAP(kMipsBgez, 0x04010000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "bgez", "!0r,!1t!0N", 8), + ENCODING_MAP(kMipsBgtz, 0x1C000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "bgtz", "!0r,!1t!0N", 8), + ENCODING_MAP(kMipsBlez, 0x18000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "blez", "!0r,!1t!0N", 8), + ENCODING_MAP(kMipsBltz, 0x04000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "bltz", "!0r,!1t!0N", 8), + ENCODING_MAP(kMipsBnez, 0x14000000, /* same as bne below with t = $zero */ + kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "bnez", "!0r,!1t!0N", 8), + ENCODING_MAP(kMipsBne, 0x14000000, + kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_USE01 | + NEEDS_FIXUP, "bne", "!0r,!1r,!2t!0N", 8), + ENCODING_MAP(kMipsDiv, 0x0000001a, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtBitBlt, 25, 21, + kFmtBitBlt, 20, 16, IS_QUAD_OP | REG_DEF01 | REG_USE23, + "div", "!2r,!3r", 4), +#if __mips_isa_rev>=2 + ENCODING_MAP(kMipsExt, 0x7c000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 10, 6, + kFmtBitBlt, 15, 11, IS_QUAD_OP | REG_DEF0 | REG_USE1, + "ext", "!0r,!1r,!2d,!3D", 4), +#endif + ENCODING_MAP(kMipsJal, 0x0c000000, + kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, + "jal", "!0T(!0E)!0N", 8), + ENCODING_MAP(kMipsJalr, 0x00000009, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF0_USE1, + "jalr", "!0r,!1r!0N", 8), + ENCODING_MAP(kMipsJr, 0x00000008, + kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_USE0 | + NEEDS_FIXUP, "jr", "!0r!0N", 8), + ENCODING_MAP(kMipsLahi, 0x3C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "lahi/lui", "!0r,0x!1h(!1d)", 4), + ENCODING_MAP(kMipsLalo, 0x34000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "lalo/ori", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsLui, 0x3C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, + "lui", "!0r,0x!1h(!1d)", 4), + ENCODING_MAP(kMipsLb, 0x80000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lb", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsLbu, 0x90000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lbu", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsLh, 0x84000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lh", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsLhu, 0x94000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lhu", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsLw, 0x8C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lw", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsMfhi, 0x00000010, + kFmtBitBlt, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mfhi", "!0r", 4), + ENCODING_MAP(kMipsMflo, 0x00000012, + kFmtBitBlt, 15, 11, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mflo", "!0r", 4), + ENCODING_MAP(kMipsMove, 0x00000025, /* or using zero reg */ + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "move", "!0r,!1r", 4), + ENCODING_MAP(kMipsMovz, 0x0000000a, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "movz", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsMul, 0x70000002, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsNop, 0x00000000, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "nop", ";", 4), + ENCODING_MAP(kMipsNor, 0x00000027, /* used for "not" too */ + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "nor", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsOr, 0x00000025, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "or", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsOri, 0x34000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "ori", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsPref, 0xCC000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE2, + "pref", "!0d,!1d(!2r)", 4), + ENCODING_MAP(kMipsSb, 0xA0000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sb", "!0r,!1d(!2r)", 4), +#if __mips_isa_rev>=2 + ENCODING_MAP(kMipsSeb, 0x7c000420, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "seb", "!0r,!1r", 4), + ENCODING_MAP(kMipsSeh, 0x7c000620, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "seh", "!0r,!1r", 4), +#endif + ENCODING_MAP(kMipsSh, 0xA4000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sh", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsSll, 0x00000000, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "sll", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsSllv, 0x00000004, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sllv", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsSlt, 0x0000002a, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "slt", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsSlti, 0x28000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "slti", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsSltu, 0x0000002b, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sltu", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsSra, 0x00000003, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "sra", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsSrav, 0x00000007, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "srav", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsSrl, 0x00000002, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 10, 6, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "srl", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsSrlv, 0x00000006, + kFmtBitBlt, 15, 11, kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "srlv", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsSubu, 0x00000023, /* used for "neg" too */ + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "subu", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsSw, 0xAC000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sw", "!0r,!1d(!2r)", 4), + ENCODING_MAP(kMipsXor, 0x00000026, + kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "xor", "!0r,!1r,!2r", 4), + ENCODING_MAP(kMipsXori, 0x38000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 25, 21, kFmtBitBlt, 15, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, + "xori", "!0r,!1r,0x!2h(!2d)", 4), + ENCODING_MAP(kMipsFadds, 0x46000000, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "add.s", "!0s,!1s,!2s", 4), + ENCODING_MAP(kMipsFsubs, 0x46000001, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sub.s", "!0s,!1s,!2s", 4), + ENCODING_MAP(kMipsFmuls, 0x46000002, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul.s", "!0s,!1s,!2s", 4), + ENCODING_MAP(kMipsFdivs, 0x46000003, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtSfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "div.s", "!0s,!1s,!2s", 4), + ENCODING_MAP(kMipsFaddd, 0x46200000, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "add.d", "!0S,!1S,!2S", 4), + ENCODING_MAP(kMipsFsubd, 0x46200001, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sub.d", "!0S,!1S,!2S", 4), + ENCODING_MAP(kMipsFmuld, 0x46200002, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "mul.d", "!0S,!1S,!2S", 4), + ENCODING_MAP(kMipsFdivd, 0x46200003, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtDfp, 20, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "div.d", "!0S,!1S,!2S", 4), + ENCODING_MAP(kMipsFcvtsd, 0x46200020, + kFmtSfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.s.d", "!0s,!1S", 4), + ENCODING_MAP(kMipsFcvtsw, 0x46800020, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.s.w", "!0s,!1s", 4), + ENCODING_MAP(kMipsFcvtds, 0x46000021, + kFmtDfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.d.s", "!0S,!1s", 4), + ENCODING_MAP(kMipsFcvtdw, 0x46800021, + kFmtDfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.d.w", "!0S,!1s", 4), + ENCODING_MAP(kMipsFcvtws, 0x46000024, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.w.s", "!0s,!1s", 4), + ENCODING_MAP(kMipsFcvtwd, 0x46200024, + kFmtSfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "cvt.w.d", "!0s,!1S", 4), + ENCODING_MAP(kMipsFmovs, 0x46000006, + kFmtSfp, 10, 6, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov.s", "!0s,!1s", 4), + ENCODING_MAP(kMipsFmovd, 0x46200006, + kFmtDfp, 10, 6, kFmtDfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mov.d", "!0S,!1S", 4), + ENCODING_MAP(kMipsFlwc1, 0xC4000000, + kFmtSfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "lwc1", "!0s,!1d(!2r)", 4), + ENCODING_MAP(kMipsFldc1, 0xD4000000, + kFmtDfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE2 | IS_LOAD, + "ldc1", "!0S,!1d(!2r)", 4), + ENCODING_MAP(kMipsFswc1, 0xE4000000, + kFmtSfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "swc1", "!0s,!1d(!2r)", 4), + ENCODING_MAP(kMipsFsdc1, 0xF4000000, + kFmtDfp, 20, 16, kFmtBitBlt, 15, 0, kFmtBitBlt, 25, 21, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE02 | IS_STORE, + "sdc1", "!0S,!1d(!2r)", 4), + ENCODING_MAP(kMipsMfc1, 0x44000000, + kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, + "mfc1", "!0r,!1s", 4), + ENCODING_MAP(kMipsMtc1, 0x44800000, + kFmtBitBlt, 20, 16, kFmtSfp, 15, 11, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1, + "mtc1", "!0r,!1s", 4), + ENCODING_MAP(kMipsDelta, 0x27e00000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, 15, 0, + kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | REG_USE_LR | + NEEDS_FIXUP, "addiu", "!0r,ra,0x!1h(!1d)", 4), + ENCODING_MAP(kMipsDeltaHi, 0x3C000000, + kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | NEEDS_FIXUP, + "lui", "!0r,0x!1h(!1d)", 4), + ENCODING_MAP(kMipsDeltaLo, 0x34000000, + kFmtBlt5_2, 16, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0_USE0 | NEEDS_FIXUP, + "ori", "!0r,!0r,0x!1h(!1d)", 4), + ENCODING_MAP(kMipsCurrPC, 0x04110001, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH | REG_DEF_LR, + "addiu", "ra,pc,8", 4), + ENCODING_MAP(kMipsSync, 0x0000000f, + kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_UNARY_OP, + "sync", ";", 4), + ENCODING_MAP(kMipsUndefined, 0x64000000, + kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, NO_OPERAND, + "undefined", "", 4), +}; + + +/* + * Convert a short-form branch to long form. Hopefully, this won't happen + * very often because the PIC sequence is especially unfortunate. + * + * Orig conditional branch + * ----------------------- + * beq rs,rt,target + * + * Long conditional branch + * ----------------------- + * bne rs,rt,hop + * bal .+8 ; r_RA <- anchor + * lui r_AT, ((target-anchor) >> 16) + * anchor: + * ori r_AT, r_AT, ((target-anchor) & 0xffff) + * addu r_AT, r_AT, r_RA + * jr r_AT + * hop: + * + * Orig unconditional branch + * ------------------------- + * b target + * + * Long unconditional branch + * ----------------------- + * bal .+8 ; r_RA <- anchor + * lui r_AT, ((target-anchor) >> 16) + * anchor: + * ori r_AT, r_AT, ((target-anchor) & 0xffff) + * addu r_AT, r_AT, r_RA + * jr r_AT + * + * + * NOTE: An out-of-range bal isn't supported because it should + * never happen with the current PIC model. + */ +void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) +{ + // For conditional branches we'll need to reverse the sense + bool unconditional = false; + int opcode = lir->opcode; + int dalvik_offset = lir->dalvik_offset; + switch (opcode) { + case kMipsBal: + LOG(FATAL) << "long branch and link unsupported"; + case kMipsB: + unconditional = true; + break; + case kMipsBeq: opcode = kMipsBne; break; + case kMipsBne: opcode = kMipsBeq; break; + case kMipsBeqz: opcode = kMipsBnez; break; + case kMipsBgez: opcode = kMipsBltz; break; + case kMipsBgtz: opcode = kMipsBlez; break; + case kMipsBlez: opcode = kMipsBgtz; break; + case kMipsBltz: opcode = kMipsBgez; break; + case kMipsBnez: opcode = kMipsBeqz; break; + default: + LOG(FATAL) << "Unexpected branch kind " << opcode; + } + LIR* hop_target = NULL; + if (!unconditional) { + hop_target = RawLIR(dalvik_offset, kPseudoTargetLabel); + LIR* hop_branch = RawLIR(dalvik_offset, opcode, lir->operands[0], + lir->operands[1], 0, 0, 0, hop_target); + InsertLIRBefore(lir, hop_branch); + } + LIR* curr_pc = RawLIR(dalvik_offset, kMipsCurrPC); + InsertLIRBefore(lir, curr_pc); + LIR* anchor = RawLIR(dalvik_offset, kPseudoTargetLabel); + LIR* delta_hi = RawLIR(dalvik_offset, kMipsDeltaHi, r_AT, 0, + reinterpret_cast<uintptr_t>(anchor), 0, 0, lir->target); + InsertLIRBefore(lir, delta_hi); + InsertLIRBefore(lir, anchor); + LIR* delta_lo = RawLIR(dalvik_offset, kMipsDeltaLo, r_AT, 0, + reinterpret_cast<uintptr_t>(anchor), 0, 0, lir->target); + InsertLIRBefore(lir, delta_lo); + LIR* addu = RawLIR(dalvik_offset, kMipsAddu, r_AT, r_AT, r_RA); + InsertLIRBefore(lir, addu); + LIR* jr = RawLIR(dalvik_offset, kMipsJr, r_AT); + InsertLIRBefore(lir, jr); + if (!unconditional) { + InsertLIRBefore(lir, hop_target); + } + lir->flags.is_nop = true; +} + +/* + * Assemble the LIR into binary instruction format. Note that we may + * discover that pc-relative displacements may not fit the selected + * instruction. In those cases we will try to substitute a new code + * sequence or request that the trace be shortened and retried. + */ +AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) +{ + LIR *lir; + AssemblerStatus res = kSuccess; // Assume success + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + if (lir->opcode < 0) { + continue; + } + + + if (lir->flags.is_nop) { + continue; + } + + if (lir->flags.pcRelFixup) { + if (lir->opcode == kMipsDelta) { + /* + * The "Delta" pseudo-ops load the difference between + * two pc-relative locations into a the target register + * found in operands[0]. The delta is determined by + * (label2 - label1), where label1 is a standard + * kPseudoTargetLabel and is stored in operands[2]. + * If operands[3] is null, then label2 is a kPseudoTargetLabel + * and is found in lir->target. If operands[3] is non-NULL, + * then it is a Switch/Data table. + */ + int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; + int delta = offset2 - offset1; + if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) { + // Fits + lir->operands[1] = delta; + } else { + // Doesn't fit - must expand to kMipsDelta[Hi|Lo] pair + LIR *new_delta_hi = + RawLIR(lir->dalvik_offset, kMipsDeltaHi, + lir->operands[0], 0, lir->operands[2], + lir->operands[3], 0, lir->target); + InsertLIRBefore(lir, new_delta_hi); + LIR *new_delta_lo = + RawLIR(lir->dalvik_offset, kMipsDeltaLo, + lir->operands[0], 0, lir->operands[2], + lir->operands[3], 0, lir->target); + InsertLIRBefore(lir, new_delta_lo); + LIR *new_addu = + RawLIR(lir->dalvik_offset, kMipsAddu, + lir->operands[0], lir->operands[0], r_RA); + InsertLIRBefore(lir, new_addu); + lir->flags.is_nop = true; + res = kRetryAll; + } + } else if (lir->opcode == kMipsDeltaLo) { + int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; + int delta = offset2 - offset1; + lir->operands[1] = delta & 0xffff; + } else if (lir->opcode == kMipsDeltaHi) { + int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; + int delta = offset2 - offset1; + lir->operands[1] = (delta >> 16) & 0xffff; + } else if (lir->opcode == kMipsB || lir->opcode == kMipsBal) { + LIR *target_lir = lir->target; + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + int delta = target - pc; + if (delta & 0x3) { + LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; + } + if (delta > 131068 || delta < -131069) { + res = kRetryAll; + ConvertShortToLongBranch(lir); + } else { + lir->operands[0] = delta >> 2; + } + } else if (lir->opcode >= kMipsBeqz && lir->opcode <= kMipsBnez) { + LIR *target_lir = lir->target; + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + int delta = target - pc; + if (delta & 0x3) { + LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; + } + if (delta > 131068 || delta < -131069) { + res = kRetryAll; + ConvertShortToLongBranch(lir); + } else { + lir->operands[1] = delta >> 2; + } + } else if (lir->opcode == kMipsBeq || lir->opcode == kMipsBne) { + LIR *target_lir = lir->target; + uintptr_t pc = lir->offset + 4; + uintptr_t target = target_lir->offset; + int delta = target - pc; + if (delta & 0x3) { + LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; + } + if (delta > 131068 || delta < -131069) { + res = kRetryAll; + ConvertShortToLongBranch(lir); + } else { + lir->operands[2] = delta >> 2; + } + } else if (lir->opcode == kMipsJal) { + uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3; + uintptr_t target = lir->operands[0]; + /* ensure PC-region branch can be used */ + DCHECK_EQ((cur_pc & 0xF0000000), (target & 0xF0000000)); + if (target & 0x3) { + LOG(FATAL) << "Jump target not multiple of 4: " << target; + } + lir->operands[0] = target >> 2; + } else if (lir->opcode == kMipsLahi) { /* ld address hi (via lui) */ + LIR *target_lir = lir->target; + uintptr_t target = start_addr + target_lir->offset; + lir->operands[1] = target >> 16; + } else if (lir->opcode == kMipsLalo) { /* ld address lo (via ori) */ + LIR *target_lir = lir->target; + uintptr_t target = start_addr + target_lir->offset; + lir->operands[2] = lir->operands[2] + target; + } + } + + /* + * If one of the pc-relative instructions expanded we'll have + * to make another pass. Don't bother to fully assemble the + * instruction. + */ + if (res != kSuccess) { + continue; + } + const MipsEncodingMap *encoder = &EncodingMap[lir->opcode]; + uint32_t bits = encoder->skeleton; + int i; + for (i = 0; i < 4; i++) { + uint32_t operand; + uint32_t value; + operand = lir->operands[i]; + switch (encoder->field_loc[i].kind) { + case kFmtUnused: + break; + case kFmtBitBlt: + if (encoder->field_loc[i].start == 0 && encoder->field_loc[i].end == 31) { + value = operand; + } else { + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + } + bits |= value; + break; + case kFmtBlt5_2: + value = (operand & 0x1f); + bits |= (value << encoder->field_loc[i].start); + bits |= (value << encoder->field_loc[i].end); + break; + case kFmtDfp: { + DCHECK(MIPS_DOUBLEREG(operand)); + DCHECK_EQ((operand & 0x1), 0U); + value = ((operand & MIPS_FP_REG_MASK) << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; + break; + } + case kFmtSfp: + DCHECK(MIPS_SINGLEREG(operand)); + value = ((operand & MIPS_FP_REG_MASK) << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; + break; + default: + LOG(FATAL) << "Bad encoder format: " << encoder->field_loc[i].kind; + } + } + // We only support little-endian MIPS. + code_buffer_.push_back(bits & 0xff); + code_buffer_.push_back((bits >> 8) & 0xff); + code_buffer_.push_back((bits >> 16) & 0xff); + code_buffer_.push_back((bits >> 24) & 0xff); + // TUNING: replace with proper delay slot handling + if (encoder->size == 8) { + const MipsEncodingMap *encoder = &EncodingMap[kMipsNop]; + uint32_t bits = encoder->skeleton; + code_buffer_.push_back(bits & 0xff); + code_buffer_.push_back((bits >> 8) & 0xff); + code_buffer_.push_back((bits >> 16) & 0xff); + code_buffer_.push_back((bits >> 24) & 0xff); + } + } + return res; +} + +int MipsMir2Lir::GetInsnSize(LIR* lir) +{ + return EncodingMap[lir->opcode].size; +} + +} // namespace art diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc new file mode 100644 index 0000000000..eb0302e80f --- /dev/null +++ b/compiler/dex/quick/mips/call_mips.cc @@ -0,0 +1,392 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the Mips ISA */ + +#include "codegen_mips.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mips_lir.h" +#include "oat/runtime/oat_support_entrypoints.h" + +namespace art { + +void MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, + SpecialCaseHandler special_case) +{ + // TODO +} + +/* + * The lack of pc-relative loads on Mips presents somewhat of a challenge + * for our PIC switch table strategy. To materialize the current location + * we'll do a dummy JAL and reference our tables using r_RA as the + * base register. Note that r_RA will be used both as the base to + * locate the switch table data and as the reference base for the switch + * target offsets stored in the table. We'll use a special pseudo-instruction + * to represent the jal and trigger the construction of the + * switch table offsets (which will happen after final assembly and all + * labels are fixed). + * + * The test loop will look something like: + * + * ori rEnd, r_ZERO, #table_size ; size in bytes + * jal BaseLabel ; stores "return address" (BaseLabel) in r_RA + * nop ; opportunistically fill + * BaseLabel: + * addiu rBase, r_RA, <table> - <BaseLabel> ; table relative to BaseLabel + addu rEnd, rEnd, rBase ; end of table + * lw r_val, [rSP, v_reg_off] ; Test Value + * loop: + * beq rBase, rEnd, done + * lw r_key, 0(rBase) + * addu rBase, 8 + * bne r_val, r_key, loop + * lw r_disp, -4(rBase) + * addu r_RA, r_disp + * jr r_RA + * done: + * + */ +void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpSparseSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + int elements = table[1]; + tab_rec->targets = + static_cast<LIR**>(arena_->NewMem(elements * sizeof(LIR*), true, ArenaAllocator::kAllocLIR)); + switch_tables_.Insert(tab_rec); + + // The table is composed of 8-byte key/disp pairs + int byte_size = elements * 8; + + int size_hi = byte_size >> 16; + int size_lo = byte_size & 0xffff; + + int rEnd = AllocTemp(); + if (size_hi) { + NewLIR2(kMipsLui, rEnd, size_hi); + } + // Must prevent code motion for the curr pc pair + GenBarrier(); // Scheduling barrier + NewLIR0(kMipsCurrPC); // Really a jal to .+8 + // Now, fill the branch delay slot + if (size_hi) { + NewLIR3(kMipsOri, rEnd, rEnd, size_lo); + } else { + NewLIR3(kMipsOri, rEnd, r_ZERO, size_lo); + } + GenBarrier(); // Scheduling barrier + + // Construct BaseLabel and set up table base register + LIR* base_label = NewLIR0(kPseudoTargetLabel); + // Remember base label so offsets can be computed later + tab_rec->anchor = base_label; + int rBase = AllocTemp(); + NewLIR4(kMipsDelta, rBase, 0, reinterpret_cast<uintptr_t>(base_label), + reinterpret_cast<uintptr_t>(tab_rec)); + OpRegRegReg(kOpAdd, rEnd, rEnd, rBase); + + // Grab switch test value + rl_src = LoadValue(rl_src, kCoreReg); + + // Test loop + int r_key = AllocTemp(); + LIR* loop_label = NewLIR0(kPseudoTargetLabel); + LIR* exit_branch = OpCmpBranch(kCondEq, rBase, rEnd, NULL); + LoadWordDisp(rBase, 0, r_key); + OpRegImm(kOpAdd, rBase, 8); + OpCmpBranch(kCondNe, rl_src.low_reg, r_key, loop_label); + int r_disp = AllocTemp(); + LoadWordDisp(rBase, -4, r_disp); + OpRegRegReg(kOpAdd, r_RA, r_RA, r_disp); + OpReg(kOpBx, r_RA); + + // Loop exit + LIR* exit_label = NewLIR0(kPseudoTargetLabel); + exit_branch->target = exit_label; +} + +/* + * Code pattern will look something like: + * + * lw r_val + * jal BaseLabel ; stores "return address" (BaseLabel) in r_RA + * nop ; opportunistically fill + * [subiu r_val, bias] ; Remove bias if low_val != 0 + * bound check -> done + * lw r_disp, [r_RA, r_val] + * addu r_RA, r_disp + * jr r_RA + * done: + */ +void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpPackedSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable*>(arena_->NewMem(sizeof(SwitchTable), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + int size = table[1]; + tab_rec->targets = static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true, + ArenaAllocator::kAllocLIR)); + switch_tables_.Insert(tab_rec); + + // Get the switch value + rl_src = LoadValue(rl_src, kCoreReg); + + // Prepare the bias. If too big, handle 1st stage here + int low_key = s4FromSwitchData(&table[2]); + bool large_bias = false; + int r_key; + if (low_key == 0) { + r_key = rl_src.low_reg; + } else if ((low_key & 0xffff) != low_key) { + r_key = AllocTemp(); + LoadConstant(r_key, low_key); + large_bias = true; + } else { + r_key = AllocTemp(); + } + + // Must prevent code motion for the curr pc pair + GenBarrier(); + NewLIR0(kMipsCurrPC); // Really a jal to .+8 + // Now, fill the branch delay slot with bias strip + if (low_key == 0) { + NewLIR0(kMipsNop); + } else { + if (large_bias) { + OpRegRegReg(kOpSub, r_key, rl_src.low_reg, r_key); + } else { + OpRegRegImm(kOpSub, r_key, rl_src.low_reg, low_key); + } + } + GenBarrier(); // Scheduling barrier + + // Construct BaseLabel and set up table base register + LIR* base_label = NewLIR0(kPseudoTargetLabel); + // Remember base label so offsets can be computed later + tab_rec->anchor = base_label; + + // Bounds check - if < 0 or >= size continue following switch + LIR* branch_over = OpCmpImmBranch(kCondHi, r_key, size-1, NULL); + + // Materialize the table base pointer + int rBase = AllocTemp(); + NewLIR4(kMipsDelta, rBase, 0, reinterpret_cast<uintptr_t>(base_label), + reinterpret_cast<uintptr_t>(tab_rec)); + + // Load the displacement from the switch table + int r_disp = AllocTemp(); + LoadBaseIndexed(rBase, r_key, r_disp, 2, kWord); + + // Add to r_AP and go + OpRegRegReg(kOpAdd, r_RA, r_RA, r_disp); + OpReg(kOpBx, r_RA); + + /* branch_over target here */ + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; +} + +/* + * Array data table format: + * ushort ident = 0x0300 magic value + * ushort width width of each element in the table + * uint size number of elements in the table + * ubyte data[size*width] table of data values (may contain a single-byte + * padding at the end) + * + * Total size is 4+(width * size + 1)/2 16-bit code units. + */ +void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + // Add the table to the list - we'll process it later + FillArrayData *tab_rec = + reinterpret_cast<FillArrayData*>(arena_->NewMem(sizeof(FillArrayData), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + uint16_t width = tab_rec->table[1]; + uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16); + tab_rec->size = (size * width) + 8; + + fill_array_data_.Insert(tab_rec); + + // Making a call - use explicit registers + FlushAllRegs(); /* Everything to home location */ + LockCallTemps(); + LoadValueDirectFixed(rl_src, rMIPS_ARG0); + + // Must prevent code motion for the curr pc pair + GenBarrier(); + NewLIR0(kMipsCurrPC); // Really a jal to .+8 + // Now, fill the branch delay slot with the helper load + int r_tgt = LoadHelper(ENTRYPOINT_OFFSET(pHandleFillArrayDataFromCode)); + GenBarrier(); // Scheduling barrier + + // Construct BaseLabel and set up table base register + LIR* base_label = NewLIR0(kPseudoTargetLabel); + + // Materialize a pointer to the fill data image + NewLIR4(kMipsDelta, rMIPS_ARG1, 0, reinterpret_cast<uintptr_t>(base_label), + reinterpret_cast<uintptr_t>(tab_rec)); + + // And go... + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, r_tgt); // ( array*, fill_data* ) + MarkSafepointPC(call_inst); +} + +/* + * TODO: implement fast path to short-circuit thin-lock case + */ +void MipsMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) +{ + FlushAllRegs(); + LoadValueDirectFixed(rl_src, rMIPS_ARG0); // Get obj + LockCallTemps(); // Prepare for explicit register usage + GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags); + // Go expensive route - artLockObjectFromCode(self, obj); + int r_tgt = LoadHelper(ENTRYPOINT_OFFSET(pLockObjectFromCode)); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, r_tgt); + MarkSafepointPC(call_inst); +} + +/* + * TODO: implement fast path to short-circuit thin-lock case + */ +void MipsMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) +{ + FlushAllRegs(); + LoadValueDirectFixed(rl_src, rMIPS_ARG0); // Get obj + LockCallTemps(); // Prepare for explicit register usage + GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags); + // Go expensive route - UnlockObjectFromCode(obj); + int r_tgt = LoadHelper(ENTRYPOINT_OFFSET(pUnlockObjectFromCode)); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, r_tgt); + MarkSafepointPC(call_inst); +} + +void MipsMir2Lir::GenMoveException(RegLocation rl_dest) +{ + int ex_offset = Thread::ExceptionOffset().Int32Value(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + int reset_reg = AllocTemp(); + LoadWordDisp(rMIPS_SELF, ex_offset, rl_result.low_reg); + LoadConstant(reset_reg, 0); + StoreWordDisp(rMIPS_SELF, ex_offset, reset_reg); + FreeTemp(reset_reg); + StoreValue(rl_dest, rl_result); +} + +/* + * Mark garbage collection card. Skip if the value we're storing is null. + */ +void MipsMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) +{ + int reg_card_base = AllocTemp(); + int reg_card_no = AllocTemp(); + LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); + LoadWordDisp(rMIPS_SELF, Thread::CardTableOffset().Int32Value(), reg_card_base); + OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); + StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, + kUnsignedByte); + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; + FreeTemp(reg_card_base); + FreeTemp(reg_card_no); +} +void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) +{ + int spill_count = num_core_spills_ + num_fp_spills_; + /* + * On entry, rMIPS_ARG0, rMIPS_ARG1, rMIPS_ARG2 & rMIPS_ARG3 are live. Let the register + * allocation mechanism know so it doesn't try to use any of them when + * expanding the frame or flushing. This leaves the utility + * code with a single temp: r12. This should be enough. + */ + LockTemp(rMIPS_ARG0); + LockTemp(rMIPS_ARG1); + LockTemp(rMIPS_ARG2); + LockTemp(rMIPS_ARG3); + + /* + * We can safely skip the stack overflow check if we're + * a leaf *and* our frame size < fudge factor. + */ + bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && + (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes)); + NewLIR0(kPseudoMethodEntry); + int check_reg = AllocTemp(); + int new_sp = AllocTemp(); + if (!skip_overflow_check) { + /* Load stack limit */ + LoadWordDisp(rMIPS_SELF, Thread::StackEndOffset().Int32Value(), check_reg); + } + /* Spill core callee saves */ + SpillCoreRegs(); + /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */ + DCHECK_EQ(num_fp_spills_, 0); + if (!skip_overflow_check) { + OpRegRegImm(kOpSub, new_sp, rMIPS_SP, frame_size_ - (spill_count * 4)); + GenRegRegCheck(kCondCc, new_sp, check_reg, kThrowStackOverflow); + OpRegCopy(rMIPS_SP, new_sp); // Establish stack + } else { + OpRegImm(kOpSub, rMIPS_SP, frame_size_ - (spill_count * 4)); + } + + FlushIns(ArgLocs, rl_method); + + FreeTemp(rMIPS_ARG0); + FreeTemp(rMIPS_ARG1); + FreeTemp(rMIPS_ARG2); + FreeTemp(rMIPS_ARG3); +} + +void MipsMir2Lir::GenExitSequence() +{ + /* + * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't + * allocated by the register utilities as temps. + */ + LockTemp(rMIPS_RET0); + LockTemp(rMIPS_RET1); + + NewLIR0(kPseudoMethodExit); + UnSpillCoreRegs(); + OpReg(kOpBx, r_RA); +} + +} // namespace art diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h new file mode 100644 index 0000000000..9723b899a9 --- /dev/null +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_DEX_QUICK_CODEGEN_MIPS_CODEGENMIPS_H_ +#define ART_SRC_DEX_QUICK_CODEGEN_MIPS_CODEGENMIPS_H_ + +#include "dex/compiler_internals.h" +#include "mips_lir.h" + +namespace art { + +class MipsMir2Lir : public Mir2Lir { + public: + + MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + + // Required for target - codegen utilities. + bool SmallLiteralDivide(Instruction::Code dalvik_opcode, RegLocation rl_src, + RegLocation rl_dest, int lit); + int LoadHelper(int offset); + LIR* LoadBaseDisp(int rBase, int displacement, int r_dest, OpSize size, int s_reg); + LIR* LoadBaseDispWide(int rBase, int displacement, int r_dest_lo, int r_dest_hi, + int s_reg); + LIR* LoadBaseIndexed(int rBase, int r_index, int r_dest, int scale, OpSize size); + LIR* LoadBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_dest, int r_dest_hi, OpSize size, int s_reg); + LIR* LoadConstantNoClobber(int r_dest, int value); + LIR* LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value); + LIR* StoreBaseDisp(int rBase, int displacement, int r_src, OpSize size); + LIR* StoreBaseDispWide(int rBase, int displacement, int r_src_lo, int r_src_hi); + LIR* StoreBaseIndexed(int rBase, int r_index, int r_src, int scale, OpSize size); + LIR* StoreBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_src, int r_src_hi, OpSize size, int s_reg); + void MarkGCCard(int val_reg, int tgt_addr_reg); + + // Required for target - register utilities. + bool IsFpReg(int reg); + bool SameRegType(int reg1, int reg2); + int AllocTypedTemp(bool fp_hint, int reg_class); + int AllocTypedTempPair(bool fp_hint, int reg_class); + int S2d(int low_reg, int high_reg); + int TargetReg(SpecialTargetRegister reg); + RegisterInfo* GetRegInfo(int reg); + RegLocation GetReturnAlt(); + RegLocation GetReturnWideAlt(); + RegLocation LocCReturn(); + RegLocation LocCReturnDouble(); + RegLocation LocCReturnFloat(); + RegLocation LocCReturnWide(); + uint32_t FpRegMask(); + uint64_t GetRegMaskCommon(int reg); + void AdjustSpillMask(); + void ClobberCalleeSave(); + void FlushReg(int reg); + void FlushRegWide(int reg1, int reg2); + void FreeCallTemps(); + void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free); + void LockCallTemps(); + void MarkPreservedSingle(int v_reg, int reg); + void CompilerInitializeRegAlloc(); + + // Required for target - miscellaneous. + AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); + void SetupTargetResourceMasks(LIR* lir); + const char* GetTargetInstFmt(int opcode); + const char* GetTargetInstName(int opcode); + std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); + uint64_t GetPCUseDefEncoding(); + uint64_t GetTargetInstFlags(int opcode); + int GetInsnSize(LIR* lir); + bool IsUnconditionalBranch(LIR* lir); + + // Required for target - Dalvik-level generators. + void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src, int scale); + void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale); + void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); + bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); + bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); + bool GenInlinedSqrt(CallInfo* info); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset, + ThrowKind kind); + RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div); + RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit, bool is_div); + void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivZeroCheck(int reg_lo, int reg_hi); + void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); + void GenExitSequence(); + void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); + void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); + void GenSelect(BasicBlock* bb, MIR* mir); + void GenMemBarrier(MemBarrierKind barrier_kind); + void GenMonitorEnter(int opt_flags, RegLocation rl_src); + void GenMonitorExit(int opt_flags, RegLocation rl_src); + void GenMoveException(RegLocation rl_dest); + void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, + int first_bit, int second_bit); + void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); + void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); + + // Required for target - single operation generators. + LIR* OpUnconditionalBranch(LIR* target); + LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target); + LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* target); + LIR* OpCondBranch(ConditionCode cc, LIR* target); + LIR* OpDecAndBranch(ConditionCode c_code, int reg, LIR* target); + LIR* OpFpRegCopy(int r_dest, int r_src); + LIR* OpIT(ConditionCode cond, const char* guide); + LIR* OpMem(OpKind op, int rBase, int disp); + LIR* OpPcRelLoad(int reg, LIR* target); + LIR* OpReg(OpKind op, int r_dest_src); + LIR* OpRegCopy(int r_dest, int r_src); + LIR* OpRegCopyNoInsert(int r_dest, int r_src); + LIR* OpRegImm(OpKind op, int r_dest_src1, int value); + LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset); + LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2); + LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value); + LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2); + LIR* OpTestSuspend(LIR* target); + LIR* OpThreadMem(OpKind op, int thread_offset); + LIR* OpVldm(int rBase, int count); + LIR* OpVstm(int rBase, int count); + void OpLea(int rBase, int reg1, int reg2, int scale, int offset); + void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, int src_hi); + void OpTlsCmp(int offset, int val); + + LIR* LoadBaseDispBody(int rBase, int displacement, int r_dest, int r_dest_hi, OpSize size, + int s_reg); + LIR* StoreBaseDispBody(int rBase, int displacement, int r_src, int r_src_hi, OpSize size); + void SpillCoreRegs(); + void UnSpillCoreRegs(); + static const MipsEncodingMap EncodingMap[kMipsLast]; + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + private: + void ConvertShortToLongBranch(LIR* lir); + +}; + +} // namespace art + +#endif // ART_SRC_DEX_QUICK_CODEGEN_MIPS_CODEGENMIPS_H_ diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc new file mode 100644 index 0000000000..8581d5beb6 --- /dev/null +++ b/compiler/dex/quick/mips/fp_mips.cc @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_mips.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mips_lir.h" +#include "oat/runtime/oat_support_entrypoints.h" + +namespace art { + +void MipsMir2Lir::GenArithOpFloat(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + int op = kMipsNop; + RegLocation rl_result; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (opcode) { + case Instruction::ADD_FLOAT_2ADDR: + case Instruction::ADD_FLOAT: + op = kMipsFadds; + break; + case Instruction::SUB_FLOAT_2ADDR: + case Instruction::SUB_FLOAT: + op = kMipsFsubs; + break; + case Instruction::DIV_FLOAT_2ADDR: + case Instruction::DIV_FLOAT: + op = kMipsFdivs; + break; + case Instruction::MUL_FLOAT_2ADDR: + case Instruction::MUL_FLOAT: + op = kMipsFmuls; + break; + case Instruction::REM_FLOAT_2ADDR: + case Instruction::REM_FLOAT: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2, false); + rl_result = GetReturn(true); + StoreValue(rl_dest, rl_result); + return; + case Instruction::NEG_FLOAT: + GenNegFloat(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR3(op, rl_result.low_reg, rl_src1.low_reg, rl_src2.low_reg); + StoreValue(rl_dest, rl_result); +} + +void MipsMir2Lir::GenArithOpDouble(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + int op = kMipsNop; + RegLocation rl_result; + + switch (opcode) { + case Instruction::ADD_DOUBLE_2ADDR: + case Instruction::ADD_DOUBLE: + op = kMipsFaddd; + break; + case Instruction::SUB_DOUBLE_2ADDR: + case Instruction::SUB_DOUBLE: + op = kMipsFsubd; + break; + case Instruction::DIV_DOUBLE_2ADDR: + case Instruction::DIV_DOUBLE: + op = kMipsFdivd; + break; + case Instruction::MUL_DOUBLE_2ADDR: + case Instruction::MUL_DOUBLE: + op = kMipsFmuld; + break; + case Instruction::REM_DOUBLE_2ADDR: + case Instruction::REM_DOUBLE: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2, false); + rl_result = GetReturnWide(true); + StoreValueWide(rl_dest, rl_result); + return; + case Instruction::NEG_DOUBLE: + GenNegDouble(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unpexpected opcode: " << opcode; + } + rl_src1 = LoadValueWide(rl_src1, kFPReg); + DCHECK(rl_src1.wide); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + DCHECK(rl_src2.wide); + rl_result = EvalLoc(rl_dest, kFPReg, true); + DCHECK(rl_dest.wide); + DCHECK(rl_result.wide); + NewLIR3(op, S2d(rl_result.low_reg, rl_result.high_reg), S2d(rl_src1.low_reg, rl_src1.high_reg), + S2d(rl_src2.low_reg, rl_src2.high_reg)); + StoreValueWide(rl_dest, rl_result); +} + +void MipsMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src) +{ + int op = kMipsNop; + int src_reg; + RegLocation rl_result; + switch (opcode) { + case Instruction::INT_TO_FLOAT: + op = kMipsFcvtsw; + break; + case Instruction::DOUBLE_TO_FLOAT: + op = kMipsFcvtsd; + break; + case Instruction::FLOAT_TO_DOUBLE: + op = kMipsFcvtds; + break; + case Instruction::INT_TO_DOUBLE: + op = kMipsFcvtdw; + break; + case Instruction::FLOAT_TO_INT: + GenConversionCall(ENTRYPOINT_OFFSET(pF2iz), rl_dest, rl_src); + return; + case Instruction::DOUBLE_TO_INT: + GenConversionCall(ENTRYPOINT_OFFSET(pD2iz), rl_dest, rl_src); + return; + case Instruction::LONG_TO_DOUBLE: + GenConversionCall(ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src); + return; + case Instruction::FLOAT_TO_LONG: + GenConversionCall(ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src); + return; + case Instruction::LONG_TO_FLOAT: + GenConversionCall(ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src); + return; + case Instruction::DOUBLE_TO_LONG: + GenConversionCall(ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + if (rl_src.wide) { + rl_src = LoadValueWide(rl_src, kFPReg); + src_reg = S2d(rl_src.low_reg, rl_src.high_reg); + } else { + rl_src = LoadValue(rl_src, kFPReg); + src_reg = rl_src.low_reg; + } + if (rl_dest.wide) { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, S2d(rl_result.low_reg, rl_result.high_reg), src_reg); + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, rl_result.low_reg, src_reg); + StoreValue(rl_dest, rl_result); + } +} + +void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + bool wide = true; + int offset = -1; // Make gcc happy. + + switch (opcode) { + case Instruction::CMPL_FLOAT: + offset = ENTRYPOINT_OFFSET(pCmplFloat); + wide = false; + break; + case Instruction::CMPG_FLOAT: + offset = ENTRYPOINT_OFFSET(pCmpgFloat); + wide = false; + break; + case Instruction::CMPL_DOUBLE: + offset = ENTRYPOINT_OFFSET(pCmplDouble); + break; + case Instruction::CMPG_DOUBLE: + offset = ENTRYPOINT_OFFSET(pCmpgDouble); + break; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + FlushAllRegs(); + LockCallTemps(); + if (wide) { + LoadValueDirectWideFixed(rl_src1, rMIPS_FARG0, rMIPS_FARG1); + LoadValueDirectWideFixed(rl_src2, rMIPS_FARG2, rMIPS_FARG3); + } else { + LoadValueDirectFixed(rl_src1, rMIPS_FARG0); + LoadValueDirectFixed(rl_src2, rMIPS_FARG2); + } + int r_tgt = LoadHelper(offset); + // NOTE: not a safepoint + OpReg(kOpBlx, r_tgt); + RegLocation rl_result = GetReturn(false); + StoreValue(rl_dest, rl_result); +} + +void MipsMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, + bool gt_bias, bool is_double) +{ + UNIMPLEMENTED(FATAL) << "Need codegen for fused fp cmp branch"; +} + +void MipsMir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result; + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegImm(kOpAdd, rl_result.low_reg, rl_src.low_reg, 0x80000000); + StoreValue(rl_dest, rl_result); +} + +void MipsMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result; + rl_src = LoadValueWide(rl_src, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegImm(kOpAdd, rl_result.high_reg, rl_src.high_reg, 0x80000000); + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + StoreValueWide(rl_dest, rl_result); +} + +bool MipsMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) +{ + // TODO: need Mips implementation + return false; +} + +} // namespace art diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc new file mode 100644 index 0000000000..8bfc4e1f91 --- /dev/null +++ b/compiler/dex/quick/mips/int_mips.cc @@ -0,0 +1,659 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the Mips ISA */ + +#include "codegen_mips.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mips_lir.h" +#include "mirror/array.h" +#include "oat/runtime/oat_support_entrypoints.h" + +namespace art { + +/* + * Compare two 64-bit values + * x = y return 0 + * x < y return -1 + * x > y return 1 + * + * slt t0, x.hi, y.hi; # (x.hi < y.hi) ? 1:0 + * sgt t1, x.hi, y.hi; # (y.hi > x.hi) ? 1:0 + * subu res, t0, t1 # res = -1:1:0 for [ < > = ] + * bnez res, finish + * sltu t0, x.lo, y.lo + * sgtu r1, x.lo, y.lo + * subu res, t0, t1 + * finish: + * + */ +void MipsMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + int t0 = AllocTemp(); + int t1 = AllocTemp(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + NewLIR3(kMipsSlt, t0, rl_src1.high_reg, rl_src2.high_reg); + NewLIR3(kMipsSlt, t1, rl_src2.high_reg, rl_src1.high_reg); + NewLIR3(kMipsSubu, rl_result.low_reg, t1, t0); + LIR* branch = OpCmpImmBranch(kCondNe, rl_result.low_reg, 0, NULL); + NewLIR3(kMipsSltu, t0, rl_src1.low_reg, rl_src2.low_reg); + NewLIR3(kMipsSltu, t1, rl_src2.low_reg, rl_src1.low_reg); + NewLIR3(kMipsSubu, rl_result.low_reg, t1, t0); + FreeTemp(t0); + FreeTemp(t1); + LIR* target = NewLIR0(kPseudoTargetLabel); + branch->target = target; + StoreValue(rl_dest, rl_result); +} + +LIR* MipsMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, + LIR* target) +{ + LIR* branch; + MipsOpCode slt_op; + MipsOpCode br_op; + bool cmp_zero = false; + bool swapped = false; + switch (cond) { + case kCondEq: + br_op = kMipsBeq; + cmp_zero = true; + break; + case kCondNe: + br_op = kMipsBne; + cmp_zero = true; + break; + case kCondCc: + slt_op = kMipsSltu; + br_op = kMipsBnez; + break; + case kCondCs: + slt_op = kMipsSltu; + br_op = kMipsBeqz; + break; + case kCondGe: + slt_op = kMipsSlt; + br_op = kMipsBeqz; + break; + case kCondGt: + slt_op = kMipsSlt; + br_op = kMipsBnez; + swapped = true; + break; + case kCondLe: + slt_op = kMipsSlt; + br_op = kMipsBeqz; + swapped = true; + break; + case kCondLt: + slt_op = kMipsSlt; + br_op = kMipsBnez; + break; + case kCondHi: // Gtu + slt_op = kMipsSltu; + br_op = kMipsBnez; + swapped = true; + break; + default: + LOG(FATAL) << "No support for ConditionCode: " << cond; + return NULL; + } + if (cmp_zero) { + branch = NewLIR2(br_op, src1, src2); + } else { + int t_reg = AllocTemp(); + if (swapped) { + NewLIR3(slt_op, t_reg, src2, src1); + } else { + NewLIR3(slt_op, t_reg, src1, src2); + } + branch = NewLIR1(br_op, t_reg); + FreeTemp(t_reg); + } + branch->target = target; + return branch; +} + +LIR* MipsMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, + int check_value, LIR* target) +{ + LIR* branch; + if (check_value != 0) { + // TUNING: handle s16 & kCondLt/Mi case using slti + int t_reg = AllocTemp(); + LoadConstant(t_reg, check_value); + branch = OpCmpBranch(cond, reg, t_reg, target); + FreeTemp(t_reg); + return branch; + } + MipsOpCode opc; + switch (cond) { + case kCondEq: opc = kMipsBeqz; break; + case kCondGe: opc = kMipsBgez; break; + case kCondGt: opc = kMipsBgtz; break; + case kCondLe: opc = kMipsBlez; break; + //case KCondMi: + case kCondLt: opc = kMipsBltz; break; + case kCondNe: opc = kMipsBnez; break; + default: + // Tuning: use slti when applicable + int t_reg = AllocTemp(); + LoadConstant(t_reg, check_value); + branch = OpCmpBranch(cond, reg, t_reg, target); + FreeTemp(t_reg); + return branch; + } + branch = NewLIR1(opc, reg); + branch->target = target; + return branch; +} + +LIR* MipsMir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) +{ + if (MIPS_FPREG(r_dest) || MIPS_FPREG(r_src)) + return OpFpRegCopy(r_dest, r_src); + LIR* res = RawLIR(current_dalvik_offset_, kMipsMove, + r_dest, r_src); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +LIR* MipsMir2Lir::OpRegCopy(int r_dest, int r_src) +{ + LIR *res = OpRegCopyNoInsert(r_dest, r_src); + AppendLIR(res); + return res; +} + +void MipsMir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, + int src_hi) +{ + bool dest_fp = MIPS_FPREG(dest_lo) && MIPS_FPREG(dest_hi); + bool src_fp = MIPS_FPREG(src_lo) && MIPS_FPREG(src_hi); + assert(MIPS_FPREG(src_lo) == MIPS_FPREG(src_hi)); + assert(MIPS_FPREG(dest_lo) == MIPS_FPREG(dest_hi)); + if (dest_fp) { + if (src_fp) { + OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); + } else { + /* note the operands are swapped for the mtc1 instr */ + NewLIR2(kMipsMtc1, src_lo, dest_lo); + NewLIR2(kMipsMtc1, src_hi, dest_hi); + } + } else { + if (src_fp) { + NewLIR2(kMipsMfc1, dest_lo, src_lo); + NewLIR2(kMipsMfc1, dest_hi, src_hi); + } else { + // Handle overlap + if (src_hi == dest_lo) { + OpRegCopy(dest_hi, src_hi); + OpRegCopy(dest_lo, src_lo); + } else { + OpRegCopy(dest_lo, src_lo); + OpRegCopy(dest_hi, src_hi); + } + } + } +} + +void MipsMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) +{ + UNIMPLEMENTED(FATAL) << "Need codegen for select"; +} + +void MipsMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) +{ + UNIMPLEMENTED(FATAL) << "Need codegen for fused long cmp branch"; +} + +LIR* MipsMir2Lir::GenRegMemCheck(ConditionCode c_code, + int reg1, int base, int offset, ThrowKind kind) +{ + LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm"; + return NULL; +} + +RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2, + bool is_div) +{ + NewLIR4(kMipsDiv, r_HI, r_LO, reg1, reg2); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (is_div) { + NewLIR2(kMipsMflo, rl_result.low_reg, r_LO); + } else { + NewLIR2(kMipsMfhi, rl_result.low_reg, r_HI); + } + return rl_result; +} + +RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit, + bool is_div) +{ + int t_reg = AllocTemp(); + NewLIR3(kMipsAddiu, t_reg, r_ZERO, lit); + NewLIR4(kMipsDiv, r_HI, r_LO, reg1, t_reg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (is_div) { + NewLIR2(kMipsMflo, rl_result.low_reg, r_LO); + } else { + NewLIR2(kMipsMfhi, rl_result.low_reg, r_HI); + } + FreeTemp(t_reg); + return rl_result; +} + +void MipsMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) +{ + LOG(FATAL) << "Unexpected use of OpLea for Arm"; +} + +void MipsMir2Lir::OpTlsCmp(int offset, int val) +{ + LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm"; +} + +bool MipsMir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) { + DCHECK_NE(cu_->instruction_set, kThumb2); + return false; +} + +bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info) { + DCHECK_NE(cu_->instruction_set, kThumb2); + return false; +} + +LIR* MipsMir2Lir::OpPcRelLoad(int reg, LIR* target) { + LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips"; + return NULL; +} + +LIR* MipsMir2Lir::OpVldm(int rBase, int count) +{ + LOG(FATAL) << "Unexpected use of OpVldm for Mips"; + return NULL; +} + +LIR* MipsMir2Lir::OpVstm(int rBase, int count) +{ + LOG(FATAL) << "Unexpected use of OpVstm for Mips"; + return NULL; +} + +void MipsMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, + RegLocation rl_result, int lit, + int first_bit, int second_bit) +{ + int t_reg = AllocTemp(); + OpRegRegImm(kOpLsl, t_reg, rl_src.low_reg, second_bit - first_bit); + OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, t_reg); + FreeTemp(t_reg); + if (first_bit != 0) { + OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit); + } +} + +void MipsMir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) +{ + int t_reg = AllocTemp(); + OpRegRegReg(kOpOr, t_reg, reg_lo, reg_hi); + GenImmedCheck(kCondEq, t_reg, 0, kThrowDivZero); + FreeTemp(t_reg); +} + +// Test suspend flag, return target of taken suspend branch +LIR* MipsMir2Lir::OpTestSuspend(LIR* target) +{ + OpRegImm(kOpSub, rMIPS_SUSPEND, 1); + return OpCmpImmBranch((target == NULL) ? kCondEq : kCondNe, rMIPS_SUSPEND, 0, target); +} + +// Decrement register and branch on condition +LIR* MipsMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) +{ + OpRegImm(kOpSub, reg, 1); + return OpCmpImmBranch(c_code, reg, 0, target); +} + +bool MipsMir2Lir::SmallLiteralDivide(Instruction::Code dalvik_opcode, + RegLocation rl_src, RegLocation rl_dest, int lit) +{ + LOG(FATAL) << "Unexpected use of smallLiteralDive in Mips"; + return false; +} + +LIR* MipsMir2Lir::OpIT(ConditionCode cond, const char* guide) +{ + LOG(FATAL) << "Unexpected use of OpIT in Mips"; + return NULL; +} + +void MipsMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenMulLong for Mips"; +} + +void MipsMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + /* + * [v1 v0] = [a1 a0] + [a3 a2]; + * addu v0,a2,a0 + * addu t1,a3,a1 + * sltu v1,v0,a2 + * addu v1,v1,t1 + */ + + OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src2.low_reg, rl_src1.low_reg); + int t_reg = AllocTemp(); + OpRegRegReg(kOpAdd, t_reg, rl_src2.high_reg, rl_src1.high_reg); + NewLIR3(kMipsSltu, rl_result.high_reg, rl_result.low_reg, rl_src2.low_reg); + OpRegRegReg(kOpAdd, rl_result.high_reg, rl_result.high_reg, t_reg); + FreeTemp(t_reg); + StoreValueWide(rl_dest, rl_result); +} + +void MipsMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + /* + * [v1 v0] = [a1 a0] - [a3 a2]; + * sltu t1,a0,a2 + * subu v0,a0,a2 + * subu v1,a1,a3 + * subu v1,v1,t1 + */ + + int t_reg = AllocTemp(); + NewLIR3(kMipsSltu, t_reg, rl_src1.low_reg, rl_src2.low_reg); + OpRegRegReg(kOpSub, rl_result.low_reg, rl_src1.low_reg, rl_src2.low_reg); + OpRegRegReg(kOpSub, rl_result.high_reg, rl_src1.high_reg, rl_src2.high_reg); + OpRegRegReg(kOpSub, rl_result.high_reg, rl_result.high_reg, t_reg); + FreeTemp(t_reg); + StoreValueWide(rl_dest, rl_result); +} + +void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) +{ + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + /* + * [v1 v0] = -[a1 a0] + * negu v0,a0 + * negu v1,a1 + * sltu t1,r_zero + * subu v1,v1,t1 + */ + + OpRegReg(kOpNeg, rl_result.low_reg, rl_src.low_reg); + OpRegReg(kOpNeg, rl_result.high_reg, rl_src.high_reg); + int t_reg = AllocTemp(); + NewLIR3(kMipsSltu, t_reg, r_ZERO, rl_result.low_reg); + OpRegRegReg(kOpSub, rl_result.high_reg, rl_result.high_reg, t_reg); + FreeTemp(t_reg); + StoreValueWide(rl_dest, rl_result); +} + +void MipsMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenAndLong for Mips"; +} + +void MipsMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenOrLong for Mips"; +} + +void MipsMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenXorLong for Mips"; +} + +/* + * Generate array load + */ +void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale) +{ + RegisterClass reg_class = oat_reg_class_by_size(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + RegLocation rl_result; + rl_array = LoadValue(rl_array, kCoreReg); + rl_index = LoadValue(rl_index, kCoreReg); + + if (size == kLong || size == kDouble) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + /* null object? */ + GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); + + int reg_ptr = AllocTemp(); + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + int reg_len = INVALID_REG; + if (needs_range_check) { + reg_len = AllocTemp(); + /* Get len */ + LoadWordDisp(rl_array.low_reg, len_offset, reg_len); + } + /* reg_ptr -> array data */ + OpRegRegImm(kOpAdd, reg_ptr, rl_array.low_reg, data_offset); + FreeTemp(rl_array.low_reg); + if ((size == kLong) || (size == kDouble)) { + if (scale) { + int r_new_index = AllocTemp(); + OpRegRegImm(kOpLsl, r_new_index, rl_index.low_reg, scale); + OpRegReg(kOpAdd, reg_ptr, r_new_index); + FreeTemp(r_new_index); + } else { + OpRegReg(kOpAdd, reg_ptr, rl_index.low_reg); + } + FreeTemp(rl_index.low_reg); + rl_result = EvalLoc(rl_dest, reg_class, true); + + if (needs_range_check) { + // TODO: change kCondCS to a more meaningful name, is the sense of + // carry-set/clear flipped? + GenRegRegCheck(kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + FreeTemp(reg_len); + } + LoadBaseDispWide(reg_ptr, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); + + FreeTemp(reg_ptr); + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, reg_class, true); + + if (needs_range_check) { + // TODO: change kCondCS to a more meaningful name, is the sense of + // carry-set/clear flipped? + GenRegRegCheck(kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + FreeTemp(reg_len); + } + LoadBaseIndexed(reg_ptr, rl_index.low_reg, rl_result.low_reg, scale, size); + + FreeTemp(reg_ptr); + StoreValue(rl_dest, rl_result); + } +} + +/* + * Generate array store + * + */ +void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) +{ + RegisterClass reg_class = oat_reg_class_by_size(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + + if (size == kLong || size == kDouble) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + rl_array = LoadValue(rl_array, kCoreReg); + rl_index = LoadValue(rl_index, kCoreReg); + int reg_ptr = INVALID_REG; + if (IsTemp(rl_array.low_reg)) { + Clobber(rl_array.low_reg); + reg_ptr = rl_array.low_reg; + } else { + reg_ptr = AllocTemp(); + OpRegCopy(reg_ptr, rl_array.low_reg); + } + + /* null object? */ + GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + int reg_len = INVALID_REG; + if (needs_range_check) { + reg_len = AllocTemp(); + //NOTE: max live temps(4) here. + /* Get len */ + LoadWordDisp(rl_array.low_reg, len_offset, reg_len); + } + /* reg_ptr -> array data */ + OpRegImm(kOpAdd, reg_ptr, data_offset); + /* at this point, reg_ptr points to array, 2 live temps */ + if ((size == kLong) || (size == kDouble)) { + //TUNING: specific wide routine that can handle fp regs + if (scale) { + int r_new_index = AllocTemp(); + OpRegRegImm(kOpLsl, r_new_index, rl_index.low_reg, scale); + OpRegReg(kOpAdd, reg_ptr, r_new_index); + FreeTemp(r_new_index); + } else { + OpRegReg(kOpAdd, reg_ptr, rl_index.low_reg); + } + rl_src = LoadValueWide(rl_src, reg_class); + + if (needs_range_check) { + GenRegRegCheck(kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + FreeTemp(reg_len); + } + + StoreBaseDispWide(reg_ptr, 0, rl_src.low_reg, rl_src.high_reg); + + FreeTemp(reg_ptr); + } else { + rl_src = LoadValue(rl_src, reg_class); + if (needs_range_check) { + GenRegRegCheck(kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds); + FreeTemp(reg_len); + } + StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg, + scale, size); + } +} + +/* + * Generate array store + * + */ +void MipsMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) +{ + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); + + FlushAllRegs(); // Use explicit registers + LockCallTemps(); + + int r_value = TargetReg(kArg0); // Register holding value + int r_array_class = TargetReg(kArg1); // Register holding array's Class + int r_array = TargetReg(kArg2); // Register holding array + int r_index = TargetReg(kArg3); // Register holding index into array + + LoadValueDirectFixed(rl_array, r_array); // Grab array + LoadValueDirectFixed(rl_src, r_value); // Grab value + LoadValueDirectFixed(rl_index, r_index); // Grab index + + GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? + + // Store of null? + LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); + + // Get the array's class. + LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); + CallRuntimeHelperRegReg(ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value, + r_array_class, true); + // Redo LoadValues in case they didn't survive the call. + LoadValueDirectFixed(rl_array, r_array); // Reload array + LoadValueDirectFixed(rl_index, r_index); // Reload index + LoadValueDirectFixed(rl_src, r_value); // Reload value + r_array_class = INVALID_REG; + + // Branch here if value to be stored == null + LIR* target = NewLIR0(kPseudoTargetLabel); + null_value_check->target = target; + + bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); + int reg_len = INVALID_REG; + if (needs_range_check) { + reg_len = TargetReg(kArg1); + LoadWordDisp(r_array, len_offset, reg_len); // Get len + } + /* r_ptr -> array data */ + int r_ptr = AllocTemp(); + OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset); + if (needs_range_check) { + GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds); + } + StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord); + FreeTemp(r_ptr); + FreeTemp(r_index); + if (!mir_graph_->IsConstantNullRef(rl_src)) { + MarkGCCard(r_value, r_array); + } +} + +void MipsMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) +{ + // Default implementation is just to ignore the constant case. + GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift); +} + +void MipsMir2Lir::GenArithImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + // Default - bail to non-const handler. + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); +} + +} // namespace art diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h new file mode 100644 index 0000000000..ceab9ab1e5 --- /dev/null +++ b/compiler/dex/quick/mips/mips_lir.h @@ -0,0 +1,432 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_MIPS_MIPSLIR_H_ +#define ART_SRC_COMPILER_DEX_QUICK_MIPS_MIPSLIR_H_ + +#include "dex/compiler_internals.h" + +namespace art { + +/* + * Runtime register conventions. + * + * zero is always the value 0 + * at is scratch (normally used as temp reg by assembler) + * v0, v1 are scratch (normally hold subroutine return values) + * a0-a3 are scratch (normally hold subroutine arguments) + * t0-t8 are scratch + * t9 is scratch (normally used for function calls) + * s0 (rMIPS_SUSPEND) is reserved [holds suspend-check counter] + * s1 (rMIPS_SELF) is reserved [holds current &Thread] + * s2-s7 are callee save (promotion target) + * k0, k1 are reserved for use by interrupt handlers + * gp is reserved for global pointer + * sp is reserved + * s8 is callee save (promotion target) + * ra is scratch (normally holds the return addr) + * + * Preserved across C calls: s0-s8 + * Trashed across C calls: at, v0-v1, a0-a3, t0-t9, gp, ra + * + * Floating pointer registers + * NOTE: there are 32 fp registers (16 df pairs), but currently + * only support 16 fp registers (8 df pairs). + * f0-f15 + * df0-df7, where df0={f0,f1}, df1={f2,f3}, ... , df7={f14,f15} + * + * f0-f15 (df0-df7) trashed across C calls + * + * For mips32 code use: + * a0-a3 to hold operands + * v0-v1 to hold results + * t0-t9 for temps + * + * All jump/branch instructions have a delay slot after it. + * + * Stack frame diagram (stack grows down, higher addresses at top): + * + * +------------------------+ + * | IN[ins-1] | {Note: resides in caller's frame} + * | . | + * | IN[0] | + * | caller's Method* | + * +========================+ {Note: start of callee's frame} + * | spill region | {variable sized - will include lr if non-leaf.} + * +------------------------+ + * | ...filler word... | {Note: used as 2nd word of V[locals-1] if long] + * +------------------------+ + * | V[locals-1] | + * | V[locals-2] | + * | . | + * | . | + * | V[1] | + * | V[0] | + * +------------------------+ + * | 0 to 3 words padding | + * +------------------------+ + * | OUT[outs-1] | + * | OUT[outs-2] | + * | . | + * | OUT[0] | + * | cur_method* | <<== sp w/ 16-byte alignment + * +========================+ + */ + +// Offset to distingish FP regs. +#define MIPS_FP_REG_OFFSET 32 +// Offset to distinguish DP FP regs. +#define MIPS_FP_DOUBLE 64 +// Offset to distingish the extra regs. +#define MIPS_EXTRA_REG_OFFSET 128 +// Reg types. +#define MIPS_REGTYPE(x) (x & (MIPS_FP_REG_OFFSET | MIPS_FP_DOUBLE)) +#define MIPS_FPREG(x) ((x & MIPS_FP_REG_OFFSET) == MIPS_FP_REG_OFFSET) +#define MIPS_EXTRAREG(x) ((x & MIPS_EXTRA_REG_OFFSET) == MIPS_EXTRA_REG_OFFSET) +#define MIPS_DOUBLEREG(x) ((x & MIPS_FP_DOUBLE) == MIPS_FP_DOUBLE) +#define MIPS_SINGLEREG(x) (MIPS_FPREG(x) && !MIPS_DOUBLEREG(x)) +/* + * Note: the low register of a floating point pair is sufficient to + * create the name of a double, but require both names to be passed to + * allow for asserts to verify that the pair is consecutive if significant + * rework is done in this area. Also, it is a good reminder in the calling + * code that reg locations always describe doubles as a pair of singles. + */ +#define MIPS_S2D(x,y) ((x) | MIPS_FP_DOUBLE) +// Mask to strip off fp flags. +#define MIPS_FP_REG_MASK (MIPS_FP_REG_OFFSET-1) + +#ifdef HAVE_LITTLE_ENDIAN +#define LOWORD_OFFSET 0 +#define HIWORD_OFFSET 4 +#define r_ARG0 r_A0 +#define r_ARG1 r_A1 +#define r_ARG2 r_A2 +#define r_ARG3 r_A3 +#define r_RESULT0 r_V0 +#define r_RESULT1 r_V1 +#else +#define LOWORD_OFFSET 4 +#define HIWORD_OFFSET 0 +#define r_ARG0 r_A1 +#define r_ARG1 r_A0 +#define r_ARG2 r_A3 +#define r_ARG3 r_A2 +#define r_RESULT0 r_V1 +#define r_RESULT1 r_V0 +#endif + +// These are the same for both big and little endian. +#define r_FARG0 r_F12 +#define r_FARG1 r_F13 +#define r_FARG2 r_F14 +#define r_FARG3 r_F15 +#define r_FRESULT0 r_F0 +#define r_FRESULT1 r_F1 + +// Regs not used for Mips. +#define rMIPS_LR INVALID_REG +#define rMIPS_PC INVALID_REG + +// RegisterLocation templates return values (r_V0, or r_V0/r_V1). +#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_V0, INVALID_REG, \ + INVALID_SREG, INVALID_SREG} +#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \ + INVALID_REG, INVALID_SREG, INVALID_SREG} +#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_RESULT0, \ + r_RESULT1, INVALID_SREG, INVALID_SREG} +#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0,\ + r_FRESULT1, INVALID_SREG, INVALID_SREG} + +enum MipsResourceEncodingPos { + kMipsGPReg0 = 0, + kMipsRegSP = 29, + kMipsRegLR = 31, + kMipsFPReg0 = 32, // only 16 fp regs supported currently. + kMipsFPRegEnd = 48, + kMipsRegHI = kMipsFPRegEnd, + kMipsRegLO, + kMipsRegPC, + kMipsRegEnd = 51, +}; + +#define ENCODE_MIPS_REG_LIST(N) (static_cast<uint64_t>(N)) +#define ENCODE_MIPS_REG_SP (1ULL << kMipsRegSP) +#define ENCODE_MIPS_REG_LR (1ULL << kMipsRegLR) +#define ENCODE_MIPS_REG_PC (1ULL << kMipsRegPC) + +enum MipsNativeRegisterPool { + r_ZERO = 0, + r_AT = 1, + r_V0 = 2, + r_V1 = 3, + r_A0 = 4, + r_A1 = 5, + r_A2 = 6, + r_A3 = 7, + r_T0 = 8, + r_T1 = 9, + r_T2 = 10, + r_T3 = 11, + r_T4 = 12, + r_T5 = 13, + r_T6 = 14, + r_T7 = 15, + r_S0 = 16, + r_S1 = 17, + r_S2 = 18, + r_S3 = 19, + r_S4 = 20, + r_S5 = 21, + r_S6 = 22, + r_S7 = 23, + r_T8 = 24, + r_T9 = 25, + r_K0 = 26, + r_K1 = 27, + r_GP = 28, + r_SP = 29, + r_FP = 30, + r_RA = 31, + + r_F0 = 0 + MIPS_FP_REG_OFFSET, + r_F1, + r_F2, + r_F3, + r_F4, + r_F5, + r_F6, + r_F7, + r_F8, + r_F9, + r_F10, + r_F11, + r_F12, + r_F13, + r_F14, + r_F15, +#if 0 + /* + * TODO: The shared resource mask doesn't have enough bit positions to describe all + * MIPS registers. Expand it and enable use of fp registers 16 through 31. + */ + r_F16, + r_F17, + r_F18, + r_F19, + r_F20, + r_F21, + r_F22, + r_F23, + r_F24, + r_F25, + r_F26, + r_F27, + r_F28, + r_F29, + r_F30, + r_F31, +#endif + r_DF0 = r_F0 + MIPS_FP_DOUBLE, + r_DF1 = r_F2 + MIPS_FP_DOUBLE, + r_DF2 = r_F4 + MIPS_FP_DOUBLE, + r_DF3 = r_F6 + MIPS_FP_DOUBLE, + r_DF4 = r_F8 + MIPS_FP_DOUBLE, + r_DF5 = r_F10 + MIPS_FP_DOUBLE, + r_DF6 = r_F12 + MIPS_FP_DOUBLE, + r_DF7 = r_F14 + MIPS_FP_DOUBLE, +#if 0 // TODO: expand resource mask to enable use of all MIPS fp registers. + r_DF8 = r_F16 + MIPS_FP_DOUBLE, + r_DF9 = r_F18 + MIPS_FP_DOUBLE, + r_DF10 = r_F20 + MIPS_FP_DOUBLE, + r_DF11 = r_F22 + MIPS_FP_DOUBLE, + r_DF12 = r_F24 + MIPS_FP_DOUBLE, + r_DF13 = r_F26 + MIPS_FP_DOUBLE, + r_DF14 = r_F28 + MIPS_FP_DOUBLE, + r_DF15 = r_F30 + MIPS_FP_DOUBLE, +#endif + r_HI = MIPS_EXTRA_REG_OFFSET, + r_LO, + r_PC, +}; + +#define rMIPS_SUSPEND r_S0 +#define rMIPS_SELF r_S1 +#define rMIPS_SP r_SP +#define rMIPS_ARG0 r_ARG0 +#define rMIPS_ARG1 r_ARG1 +#define rMIPS_ARG2 r_ARG2 +#define rMIPS_ARG3 r_ARG3 +#define rMIPS_FARG0 r_FARG0 +#define rMIPS_FARG1 r_FARG1 +#define rMIPS_FARG2 r_FARG2 +#define rMIPS_FARG3 r_FARG3 +#define rMIPS_RET0 r_RESULT0 +#define rMIPS_RET1 r_RESULT1 +#define rMIPS_INVOKE_TGT r_T9 +#define rMIPS_COUNT INVALID_REG + +enum MipsShiftEncodings { + kMipsLsl = 0x0, + kMipsLsr = 0x1, + kMipsAsr = 0x2, + kMipsRor = 0x3 +}; + +// MIPS sync kinds (Note: support for kinds other than kSYNC0 may not exist). +#define kSYNC0 0x00 +#define kSYNC_WMB 0x04 +#define kSYNC_MB 0x01 +#define kSYNC_ACQUIRE 0x11 +#define kSYNC_RELEASE 0x12 +#define kSYNC_RMB 0x13 + +// TODO: Use smaller hammer when appropriate for target CPU. +#define kST kSYNC0 +#define kSY kSYNC0 + +/* + * The following enum defines the list of supported Thumb instructions by the + * assembler. Their corresponding EncodingMap positions will be defined in + * Assemble.cc. + */ +enum MipsOpCode { + kMipsFirst = 0, + kMips32BitData = kMipsFirst, // data [31..0]. + kMipsAddiu, // addiu t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0]. + kMipsAddu, // add d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100001]. + kMipsAnd, // and d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100100]. + kMipsAndi, // andi t,s,imm16 [001100] s[25..21] t[20..16] imm16[15..0]. + kMipsB, // b o [0001000000000000] o[15..0]. + kMipsBal, // bal o [0000010000010001] o[15..0]. + // NOTE: the code tests the range kMipsBeq thru kMipsBne, so adding an instruction in this + // range may require updates. + kMipsBeq, // beq s,t,o [000100] s[25..21] t[20..16] o[15..0]. + kMipsBeqz, // beqz s,o [000100] s[25..21] [00000] o[15..0]. + kMipsBgez, // bgez s,o [000001] s[25..21] [00001] o[15..0]. + kMipsBgtz, // bgtz s,o [000111] s[25..21] [00000] o[15..0]. + kMipsBlez, // blez s,o [000110] s[25..21] [00000] o[15..0]. + kMipsBltz, // bltz s,o [000001] s[25..21] [00000] o[15..0]. + kMipsBnez, // bnez s,o [000101] s[25..21] [00000] o[15..0]. + kMipsBne, // bne s,t,o [000101] s[25..21] t[20..16] o[15..0]. + kMipsDiv, // div s,t [000000] s[25..21] t[20..16] [0000000000011010]. +#if __mips_isa_rev>=2 + kMipsExt, // ext t,s,p,z [011111] s[25..21] t[20..16] z[15..11] p[10..6] [000000]. +#endif + kMipsJal, // jal t [000011] t[25..0]. + kMipsJalr, // jalr d,s [000000] s[25..21] [00000] d[15..11] hint[10..6] [001001]. + kMipsJr, // jr s [000000] s[25..21] [0000000000] hint[10..6] [001000]. + kMipsLahi, // lui t,imm16 [00111100000] t[20..16] imm16[15..0] load addr hi. + kMipsLalo, // ori t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0] load addr lo. + kMipsLui, // lui t,imm16 [00111100000] t[20..16] imm16[15..0]. + kMipsLb, // lb t,o(b) [100000] b[25..21] t[20..16] o[15..0]. + kMipsLbu, // lbu t,o(b) [100100] b[25..21] t[20..16] o[15..0]. + kMipsLh, // lh t,o(b) [100001] b[25..21] t[20..16] o[15..0]. + kMipsLhu, // lhu t,o(b) [100101] b[25..21] t[20..16] o[15..0]. + kMipsLw, // lw t,o(b) [100011] b[25..21] t[20..16] o[15..0]. + kMipsMfhi, // mfhi d [0000000000000000] d[15..11] [00000010000]. + kMipsMflo, // mflo d [0000000000000000] d[15..11] [00000010010]. + kMipsMove, // move d,s [000000] s[25..21] [00000] d[15..11] [00000100101]. + kMipsMovz, // movz d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000001010]. + kMipsMul, // mul d,s,t [011100] s[25..21] t[20..16] d[15..11] [00000000010]. + kMipsNop, // nop [00000000000000000000000000000000]. + kMipsNor, // nor d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100111]. + kMipsOr, // or d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100101]. + kMipsOri, // ori t,s,imm16 [001001] s[25..21] t[20..16] imm16[15..0]. + kMipsPref, // pref h,o(b) [101011] b[25..21] h[20..16] o[15..0]. + kMipsSb, // sb t,o(b) [101000] b[25..21] t[20..16] o[15..0]. +#if __mips_isa_rev>=2 + kMipsSeb, // seb d,t [01111100000] t[20..16] d[15..11] [10000100000]. + kMipsSeh, // seh d,t [01111100000] t[20..16] d[15..11] [11000100000]. +#endif + kMipsSh, // sh t,o(b) [101001] b[25..21] t[20..16] o[15..0]. + kMipsSll, // sll d,t,a [00000000000] t[20..16] d[15..11] a[10..6] [000000]. + kMipsSllv, // sllv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000100]. + kMipsSlt, // slt d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101010]. + kMipsSlti, // slti t,s,imm16 [001010] s[25..21] t[20..16] imm16[15..0]. + kMipsSltu, // sltu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000101011]. + kMipsSra, // sra d,s,imm5 [00000000000] t[20..16] d[15..11] imm5[10..6] [000011]. + kMipsSrav, // srav d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000111]. + kMipsSrl, // srl d,t,a [00000000000] t[20..16] d[20..16] a[10..6] [000010]. + kMipsSrlv, // srlv d,t,s [000000] s[25..21] t[20..16] d[15..11] [00000000110]. + kMipsSubu, // subu d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100011]. + kMipsSw, // sw t,o(b) [101011] b[25..21] t[20..16] o[15..0]. + kMipsXor, // xor d,s,t [000000] s[25..21] t[20..16] d[15..11] [00000100110]. + kMipsXori, // xori t,s,imm16 [001110] s[25..21] t[20..16] imm16[15..0]. + kMipsFadds, // add.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000000]. + kMipsFsubs, // sub.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000001]. + kMipsFmuls, // mul.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000010]. + kMipsFdivs, // div.s d,s,t [01000110000] t[20..16] s[15..11] d[10..6] [000011]. + kMipsFaddd, // add.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000000]. + kMipsFsubd, // sub.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000001]. + kMipsFmuld, // mul.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000010]. + kMipsFdivd, // div.d d,s,t [01000110001] t[20..16] s[15..11] d[10..6] [000011]. + kMipsFcvtsd,// cvt.s.d d,s [01000110001] [00000] s[15..11] d[10..6] [100000]. + kMipsFcvtsw,// cvt.s.w d,s [01000110100] [00000] s[15..11] d[10..6] [100000]. + kMipsFcvtds,// cvt.d.s d,s [01000110000] [00000] s[15..11] d[10..6] [100001]. + kMipsFcvtdw,// cvt.d.w d,s [01000110100] [00000] s[15..11] d[10..6] [100001]. + kMipsFcvtws,// cvt.w.d d,s [01000110000] [00000] s[15..11] d[10..6] [100100]. + kMipsFcvtwd,// cvt.w.d d,s [01000110001] [00000] s[15..11] d[10..6] [100100]. + kMipsFmovs, // mov.s d,s [01000110000] [00000] s[15..11] d[10..6] [000110]. + kMipsFmovd, // mov.d d,s [01000110001] [00000] s[15..11] d[10..6] [000110]. + kMipsFlwc1, // lwc1 t,o(b) [110001] b[25..21] t[20..16] o[15..0]. + kMipsFldc1, // ldc1 t,o(b) [110101] b[25..21] t[20..16] o[15..0]. + kMipsFswc1, // swc1 t,o(b) [111001] b[25..21] t[20..16] o[15..0]. + kMipsFsdc1, // sdc1 t,o(b) [111101] b[25..21] t[20..16] o[15..0]. + kMipsMfc1, // mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000]. + kMipsMtc1, // mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000]. + kMipsDelta, // Psuedo for ori t, s, <label>-<label>. + kMipsDeltaHi, // Pseudo for lui t, high16(<label>-<label>). + kMipsDeltaLo, // Pseudo for ori t, s, low16(<label>-<label>). + kMipsCurrPC, // jal to .+8 to materialize pc. + kMipsSync, // sync kind [000000] [0000000000000000] s[10..6] [001111]. + kMipsUndefined, // undefined [011001xxxxxxxxxxxxxxxx]. + kMipsLast +}; + +// Instruction assembly field_loc kind. +enum MipsEncodingKind { + kFmtUnused, + kFmtBitBlt, /* Bit string using end/start */ + kFmtDfp, /* Double FP reg */ + kFmtSfp, /* Single FP reg */ + kFmtBlt5_2, /* Same 5-bit field to 2 locations */ +}; + +// Struct used to define the snippet positions for each MIPS opcode. +struct MipsEncodingMap { + uint32_t skeleton; + struct { + MipsEncodingKind kind; + int end; // end for kFmtBitBlt, 1-bit slice end for FP regs. + int start; // start for kFmtBitBlt, 4-bit slice end for FP regs. + } field_loc[4]; + MipsOpCode opcode; + uint64_t flags; + const char *name; + const char* fmt; + int size; // Note: size is in bytes. +}; + +extern MipsEncodingMap EncodingMap[kMipsLast]; + +#define IS_UIMM16(v) ((0 <= (v)) && ((v) <= 65535)) +#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32766)) +#define IS_SIMM16_2WORD(v) ((-32764 <= (v)) && ((v) <= 32763)) // 2 offsets must fit. + +} // namespace art + +#endif // ART_SRC_COMPILER_DEX_QUICK_MIPS_MIPSLIR_H_ diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc new file mode 100644 index 0000000000..cab2c1b53d --- /dev/null +++ b/compiler/dex/quick/mips/target_mips.cc @@ -0,0 +1,610 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_mips.h" +#include "dex/compiler_internals.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mips_lir.h" + +#include <string> + +namespace art { + +static int core_regs[] = {r_ZERO, r_AT, r_V0, r_V1, r_A0, r_A1, r_A2, r_A3, + r_T0, r_T1, r_T2, r_T3, r_T4, r_T5, r_T6, r_T7, + r_S0, r_S1, r_S2, r_S3, r_S4, r_S5, r_S6, r_S7, r_T8, + r_T9, r_K0, r_K1, r_GP, r_SP, r_FP, r_RA}; +static int ReservedRegs[] = {r_ZERO, r_AT, r_S0, r_S1, r_K0, r_K1, r_GP, r_SP, + r_RA}; +static int core_temps[] = {r_V0, r_V1, r_A0, r_A1, r_A2, r_A3, r_T0, r_T1, r_T2, + r_T3, r_T4, r_T5, r_T6, r_T7, r_T8}; +static int FpRegs[] = {r_F0, r_F1, r_F2, r_F3, r_F4, r_F5, r_F6, r_F7, + r_F8, r_F9, r_F10, r_F11, r_F12, r_F13, r_F14, r_F15}; +static int fp_temps[] = {r_F0, r_F1, r_F2, r_F3, r_F4, r_F5, r_F6, r_F7, + r_F8, r_F9, r_F10, r_F11, r_F12, r_F13, r_F14, r_F15}; + +RegLocation MipsMir2Lir::LocCReturn() +{ + RegLocation res = MIPS_LOC_C_RETURN; + return res; +} + +RegLocation MipsMir2Lir::LocCReturnWide() +{ + RegLocation res = MIPS_LOC_C_RETURN_WIDE; + return res; +} + +RegLocation MipsMir2Lir::LocCReturnFloat() +{ + RegLocation res = MIPS_LOC_C_RETURN_FLOAT; + return res; +} + +RegLocation MipsMir2Lir::LocCReturnDouble() +{ + RegLocation res = MIPS_LOC_C_RETURN_DOUBLE; + return res; +} + +// Return a target-dependent special register. +int MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { + int res = INVALID_REG; + switch (reg) { + case kSelf: res = rMIPS_SELF; break; + case kSuspend: res = rMIPS_SUSPEND; break; + case kLr: res = rMIPS_LR; break; + case kPc: res = rMIPS_PC; break; + case kSp: res = rMIPS_SP; break; + case kArg0: res = rMIPS_ARG0; break; + case kArg1: res = rMIPS_ARG1; break; + case kArg2: res = rMIPS_ARG2; break; + case kArg3: res = rMIPS_ARG3; break; + case kFArg0: res = rMIPS_FARG0; break; + case kFArg1: res = rMIPS_FARG1; break; + case kFArg2: res = rMIPS_FARG2; break; + case kFArg3: res = rMIPS_FARG3; break; + case kRet0: res = rMIPS_RET0; break; + case kRet1: res = rMIPS_RET1; break; + case kInvokeTgt: res = rMIPS_INVOKE_TGT; break; + case kCount: res = rMIPS_COUNT; break; + } + return res; +} + +// Create a double from a pair of singles. +int MipsMir2Lir::S2d(int low_reg, int high_reg) +{ + return MIPS_S2D(low_reg, high_reg); +} + +// Return mask to strip off fp reg flags and bias. +uint32_t MipsMir2Lir::FpRegMask() +{ + return MIPS_FP_REG_MASK; +} + +// True if both regs single, both core or both double. +bool MipsMir2Lir::SameRegType(int reg1, int reg2) +{ + return (MIPS_REGTYPE(reg1) == MIPS_REGTYPE(reg2)); +} + +/* + * Decode the register id. + */ +uint64_t MipsMir2Lir::GetRegMaskCommon(int reg) +{ + uint64_t seed; + int shift; + int reg_id; + + + reg_id = reg & 0x1f; + /* Each double register is equal to a pair of single-precision FP registers */ + seed = MIPS_DOUBLEREG(reg) ? 3 : 1; + /* FP register starts at bit position 16 */ + shift = MIPS_FPREG(reg) ? kMipsFPReg0 : 0; + /* Expand the double register id into single offset */ + shift += reg_id; + return (seed << shift); +} + +uint64_t MipsMir2Lir::GetPCUseDefEncoding() +{ + return ENCODE_MIPS_REG_PC; +} + + +void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir) +{ + DCHECK_EQ(cu_->instruction_set, kMips); + + // Mips-specific resource map setup here. + uint64_t flags = MipsMir2Lir::EncodingMap[lir->opcode].flags; + + if (flags & REG_DEF_SP) { + lir->def_mask |= ENCODE_MIPS_REG_SP; + } + + if (flags & REG_USE_SP) { + lir->use_mask |= ENCODE_MIPS_REG_SP; + } + + if (flags & REG_DEF_LR) { + lir->def_mask |= ENCODE_MIPS_REG_LR; + } +} + +/* For dumping instructions */ +#define MIPS_REG_COUNT 32 +static const char *mips_reg_name[MIPS_REG_COUNT] = { + "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" +}; + +/* + * Interpret a format string and build a string no longer than size + * See format key in Assemble.c. + */ +std::string MipsMir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) +{ + std::string buf; + int i; + const char *fmt_end = &fmt[strlen(fmt)]; + char tbuf[256]; + char nc; + while (fmt < fmt_end) { + int operand; + if (*fmt == '!') { + fmt++; + DCHECK_LT(fmt, fmt_end); + nc = *fmt++; + if (nc=='!') { + strcpy(tbuf, "!"); + } else { + DCHECK_LT(fmt, fmt_end); + DCHECK_LT(static_cast<unsigned>(nc-'0'), 4u); + operand = lir->operands[nc-'0']; + switch (*fmt++) { + case 'b': + strcpy(tbuf,"0000"); + for (i=3; i>= 0; i--) { + tbuf[i] += operand & 1; + operand >>= 1; + } + break; + case 's': + sprintf(tbuf,"$f%d",operand & MIPS_FP_REG_MASK); + break; + case 'S': + DCHECK_EQ(((operand & MIPS_FP_REG_MASK) & 1), 0); + sprintf(tbuf,"$f%d",operand & MIPS_FP_REG_MASK); + break; + case 'h': + sprintf(tbuf,"%04x", operand); + break; + case 'M': + case 'd': + sprintf(tbuf,"%d", operand); + break; + case 'D': + sprintf(tbuf,"%d", operand+1); + break; + case 'E': + sprintf(tbuf,"%d", operand*4); + break; + case 'F': + sprintf(tbuf,"%d", operand*2); + break; + case 't': + sprintf(tbuf,"0x%08x (L%p)", reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + + (operand << 2), lir->target); + break; + case 'T': + sprintf(tbuf,"0x%08x", operand << 2); + break; + case 'u': { + int offset_1 = lir->operands[0]; + int offset_2 = NEXT_LIR(lir)->operands[0]; + uintptr_t target = + (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) + + (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc; + sprintf(tbuf, "%p", reinterpret_cast<void*>(target)); + break; + } + + /* Nothing to print for BLX_2 */ + case 'v': + strcpy(tbuf, "see above"); + break; + case 'r': + DCHECK(operand >= 0 && operand < MIPS_REG_COUNT); + strcpy(tbuf, mips_reg_name[operand]); + break; + case 'N': + // Placeholder for delay slot handling + strcpy(tbuf, "; nop"); + break; + default: + strcpy(tbuf,"DecodeError"); + break; + } + buf += tbuf; + } + } else { + buf += *fmt++; + } + } + return buf; +} + +// FIXME: need to redo resource maps for MIPS - fix this at that time +void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, uint64_t mask, const char *prefix) +{ + char buf[256]; + buf[0] = 0; + + if (mask == ENCODE_ALL) { + strcpy(buf, "all"); + } else { + char num[8]; + int i; + + for (i = 0; i < kMipsRegEnd; i++) { + if (mask & (1ULL << i)) { + sprintf(num, "%d ", i); + strcat(buf, num); + } + } + + if (mask & ENCODE_CCODE) { + strcat(buf, "cc "); + } + if (mask & ENCODE_FP_STATUS) { + strcat(buf, "fpcc "); + } + /* Memory bits */ + if (mips_lir && (mask & ENCODE_DALVIK_REG)) { + sprintf(buf + strlen(buf), "dr%d%s", mips_lir->alias_info & 0xffff, + (mips_lir->alias_info & 0x80000000) ? "(+1)" : ""); + } + if (mask & ENCODE_LITERAL) { + strcat(buf, "lit "); + } + + if (mask & ENCODE_HEAP_REF) { + strcat(buf, "heap "); + } + if (mask & ENCODE_MUST_NOT_ALIAS) { + strcat(buf, "noalias "); + } + } + if (buf[0]) { + LOG(INFO) << prefix << ": " << buf; + } +} + +/* + * TUNING: is true leaf? Can't just use METHOD_IS_LEAF to determine as some + * instructions might call out to C/assembly helper functions. Until + * machinery is in place, always spill lr. + */ + +void MipsMir2Lir::AdjustSpillMask() +{ + core_spill_mask_ |= (1 << r_RA); + num_core_spills_++; +} + +/* + * Mark a callee-save fp register as promoted. Note that + * vpush/vpop uses contiguous register lists so we must + * include any holes in the mask. Associate holes with + * Dalvik register INVALID_VREG (0xFFFFU). + */ +void MipsMir2Lir::MarkPreservedSingle(int s_reg, int reg) +{ + LOG(FATAL) << "No support yet for promoted FP regs"; +} + +void MipsMir2Lir::FlushRegWide(int reg1, int reg2) +{ + RegisterInfo* info1 = GetRegInfo(reg1); + RegisterInfo* info2 = GetRegInfo(reg2); + DCHECK(info1 && info2 && info1->pair && info2->pair && + (info1->partner == info2->reg) && + (info2->partner == info1->reg)); + if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) { + if (!(info1->is_temp && info2->is_temp)) { + /* Should not happen. If it does, there's a problem in eval_loc */ + LOG(FATAL) << "Long half-temp, half-promoted"; + } + + info1->dirty = false; + info2->dirty = false; + if (mir_graph_->SRegToVReg(info2->s_reg) < mir_graph_->SRegToVReg(info1->s_reg)) + info1 = info2; + int v_reg = mir_graph_->SRegToVReg(info1->s_reg); + StoreBaseDispWide(rMIPS_SP, VRegOffset(v_reg), info1->reg, info1->partner); + } +} + +void MipsMir2Lir::FlushReg(int reg) +{ + RegisterInfo* info = GetRegInfo(reg); + if (info->live && info->dirty) { + info->dirty = false; + int v_reg = mir_graph_->SRegToVReg(info->s_reg); + StoreBaseDisp(rMIPS_SP, VRegOffset(v_reg), reg, kWord); + } +} + +/* Give access to the target-dependent FP register encoding to common code */ +bool MipsMir2Lir::IsFpReg(int reg) { + return MIPS_FPREG(reg); +} + +/* Clobber all regs that might be used by an external C call */ +void MipsMir2Lir::ClobberCalleeSave() +{ + Clobber(r_ZERO); + Clobber(r_AT); + Clobber(r_V0); + Clobber(r_V1); + Clobber(r_A0); + Clobber(r_A1); + Clobber(r_A2); + Clobber(r_A3); + Clobber(r_T0); + Clobber(r_T1); + Clobber(r_T2); + Clobber(r_T3); + Clobber(r_T4); + Clobber(r_T5); + Clobber(r_T6); + Clobber(r_T7); + Clobber(r_T8); + Clobber(r_T9); + Clobber(r_K0); + Clobber(r_K1); + Clobber(r_GP); + Clobber(r_FP); + Clobber(r_RA); + Clobber(r_F0); + Clobber(r_F1); + Clobber(r_F2); + Clobber(r_F3); + Clobber(r_F4); + Clobber(r_F5); + Clobber(r_F6); + Clobber(r_F7); + Clobber(r_F8); + Clobber(r_F9); + Clobber(r_F10); + Clobber(r_F11); + Clobber(r_F12); + Clobber(r_F13); + Clobber(r_F14); + Clobber(r_F15); +} + +RegLocation MipsMir2Lir::GetReturnWideAlt() +{ + UNIMPLEMENTED(FATAL) << "No GetReturnWideAlt for MIPS"; + RegLocation res = LocCReturnWide(); + return res; +} + +RegLocation MipsMir2Lir::GetReturnAlt() +{ + UNIMPLEMENTED(FATAL) << "No GetReturnAlt for MIPS"; + RegLocation res = LocCReturn(); + return res; +} + +MipsMir2Lir::RegisterInfo* MipsMir2Lir::GetRegInfo(int reg) +{ + return MIPS_FPREG(reg) ? ®_pool_->FPRegs[reg & MIPS_FP_REG_MASK] + : ®_pool_->core_regs[reg]; +} + +/* To be used when explicitly managing register use */ +void MipsMir2Lir::LockCallTemps() +{ + LockTemp(rMIPS_ARG0); + LockTemp(rMIPS_ARG1); + LockTemp(rMIPS_ARG2); + LockTemp(rMIPS_ARG3); +} + +/* To be used when explicitly managing register use */ +void MipsMir2Lir::FreeCallTemps() +{ + FreeTemp(rMIPS_ARG0); + FreeTemp(rMIPS_ARG1); + FreeTemp(rMIPS_ARG2); + FreeTemp(rMIPS_ARG3); +} + +void MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) +{ +#if ANDROID_SMP != 0 + NewLIR1(kMipsSync, 0 /* Only stype currently supported */); +#endif +} + +/* + * Alloc a pair of core registers, or a double. Low reg in low byte, + * high reg in next byte. + */ +int MipsMir2Lir::AllocTypedTempPair(bool fp_hint, + int reg_class) +{ + int high_reg; + int low_reg; + int res = 0; + + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { + low_reg = AllocTempDouble(); + high_reg = low_reg + 1; + res = (low_reg & 0xff) | ((high_reg & 0xff) << 8); + return res; + } + + low_reg = AllocTemp(); + high_reg = AllocTemp(); + res = (low_reg & 0xff) | ((high_reg & 0xff) << 8); + return res; +} + +int MipsMir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) +{ + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) +{ + return AllocTempFloat(); +} + return AllocTemp(); +} + +void MipsMir2Lir::CompilerInitializeRegAlloc() +{ + int num_regs = sizeof(core_regs)/sizeof(*core_regs); + int num_reserved = sizeof(ReservedRegs)/sizeof(*ReservedRegs); + int num_temps = sizeof(core_temps)/sizeof(*core_temps); + int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs); + int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps); + reg_pool_ = static_cast<RegisterPool*>(arena_->NewMem(sizeof(*reg_pool_), true, + ArenaAllocator::kAllocRegAlloc)); + reg_pool_->num_core_regs = num_regs; + reg_pool_->core_regs = static_cast<RegisterInfo*> + (arena_->NewMem(num_regs * sizeof(*reg_pool_->core_regs), true, + ArenaAllocator::kAllocRegAlloc)); + reg_pool_->num_fp_regs = num_fp_regs; + reg_pool_->FPRegs = static_cast<RegisterInfo*> + (arena_->NewMem(num_fp_regs * sizeof(*reg_pool_->FPRegs), true, + ArenaAllocator::kAllocRegAlloc)); + CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs); + CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs); + // Keep special registers from being allocated + for (int i = 0; i < num_reserved; i++) { + if (NO_SUSPEND && (ReservedRegs[i] == rMIPS_SUSPEND)) { + //To measure cost of suspend check + continue; + } + MarkInUse(ReservedRegs[i]); + } + // Mark temp regs - all others not in use can be used for promotion + for (int i = 0; i < num_temps; i++) { + MarkTemp(core_temps[i]); + } + for (int i = 0; i < num_fp_temps; i++) { + MarkTemp(fp_temps[i]); + } +} + +void MipsMir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) +{ + if ((rl_free.low_reg != rl_keep.low_reg) && (rl_free.low_reg != rl_keep.high_reg) && + (rl_free.high_reg != rl_keep.low_reg) && (rl_free.high_reg != rl_keep.high_reg)) { + // No overlap, free both + FreeTemp(rl_free.low_reg); + FreeTemp(rl_free.high_reg); + } +} +/* + * In the Arm code a it is typical to use the link register + * to hold the target address. However, for Mips we must + * ensure that all branch instructions can be restarted if + * there is a trap in the shadow. Allocate a temp register. + */ +int MipsMir2Lir::LoadHelper(int offset) +{ + LoadWordDisp(rMIPS_SELF, offset, r_T9); + return r_T9; +} + +void MipsMir2Lir::SpillCoreRegs() +{ + if (num_core_spills_ == 0) { + return; + } + uint32_t mask = core_spill_mask_; + int offset = num_core_spills_ * 4; + OpRegImm(kOpSub, rMIPS_SP, offset); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + offset -= 4; + StoreWordDisp(rMIPS_SP, offset, reg); + } + } +} + +void MipsMir2Lir::UnSpillCoreRegs() +{ + if (num_core_spills_ == 0) { + return; + } + uint32_t mask = core_spill_mask_; + int offset = frame_size_; + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + offset -= 4; + LoadWordDisp(rMIPS_SP, offset, reg); + } + } + OpRegImm(kOpAdd, rMIPS_SP, frame_size_); +} + +bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) +{ + return (lir->opcode == kMipsB); +} + +MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) + : Mir2Lir(cu, mir_graph, arena) { + for (int i = 0; i < kMipsLast; i++) { + if (MipsMir2Lir::EncodingMap[i].opcode != i) { + LOG(FATAL) << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(MipsMir2Lir::EncodingMap[i].opcode); + } + } +} + +Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena) { + return new MipsMir2Lir(cu, mir_graph, arena); +} + +uint64_t MipsMir2Lir::GetTargetInstFlags(int opcode) +{ + return MipsMir2Lir::EncodingMap[opcode].flags; +} + +const char* MipsMir2Lir::GetTargetInstName(int opcode) +{ + return MipsMir2Lir::EncodingMap[opcode].name; +} + +const char* MipsMir2Lir::GetTargetInstFmt(int opcode) +{ + return MipsMir2Lir::EncodingMap[opcode].fmt; +} + +} // namespace art diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc new file mode 100644 index 0000000000..8daafc8d96 --- /dev/null +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -0,0 +1,700 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_mips.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mips_lir.h" + +namespace art { + +/* This file contains codegen for the MIPS32 ISA. */ +LIR* MipsMir2Lir::OpFpRegCopy(int r_dest, int r_src) +{ + int opcode; + /* must be both DOUBLE or both not DOUBLE */ + DCHECK_EQ(MIPS_DOUBLEREG(r_dest),MIPS_DOUBLEREG(r_src)); + if (MIPS_DOUBLEREG(r_dest)) { + opcode = kMipsFmovd; + } else { + if (MIPS_SINGLEREG(r_dest)) { + if (MIPS_SINGLEREG(r_src)) { + opcode = kMipsFmovs; + } else { + /* note the operands are swapped for the mtc1 instr */ + int t_opnd = r_src; + r_src = r_dest; + r_dest = t_opnd; + opcode = kMipsMtc1; + } + } else { + DCHECK(MIPS_SINGLEREG(r_src)); + opcode = kMipsMfc1; + } + } + LIR* res = RawLIR(current_dalvik_offset_, opcode, r_src, r_dest); + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +bool MipsMir2Lir::InexpensiveConstantInt(int32_t value) +{ + return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768))); +} + +bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value) +{ + return false; // TUNING +} + +bool MipsMir2Lir::InexpensiveConstantLong(int64_t value) +{ + return false; // TUNING +} + +bool MipsMir2Lir::InexpensiveConstantDouble(int64_t value) +{ + return false; // TUNING +} + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool. If target is + * a high register, build constant into a low register and copy. + * + * No additional register clobbering operation performed. Use this version when + * 1) r_dest is freshly returned from AllocTemp or + * 2) The codegen is under fixed register usage + */ +LIR* MipsMir2Lir::LoadConstantNoClobber(int r_dest, int value) +{ + LIR *res; + + int r_dest_save = r_dest; + int is_fp_reg = MIPS_FPREG(r_dest); + if (is_fp_reg) { + DCHECK(MIPS_SINGLEREG(r_dest)); + r_dest = AllocTemp(); + } + + /* See if the value can be constructed cheaply */ + if (value == 0) { + res = NewLIR2(kMipsMove, r_dest, r_ZERO); + } else if ((value > 0) && (value <= 65535)) { + res = NewLIR3(kMipsOri, r_dest, r_ZERO, value); + } else if ((value < 0) && (value >= -32768)) { + res = NewLIR3(kMipsAddiu, r_dest, r_ZERO, value); + } else { + res = NewLIR2(kMipsLui, r_dest, value>>16); + if (value & 0xffff) + NewLIR3(kMipsOri, r_dest, r_dest, value); + } + + if (is_fp_reg) { + NewLIR2(kMipsMtc1, r_dest, r_dest_save); + FreeTemp(r_dest); + } + + return res; +} + +LIR* MipsMir2Lir::OpUnconditionalBranch(LIR* target) +{ + LIR* res = NewLIR1(kMipsB, 0 /* offset to be patched during assembly*/ ); + res->target = target; + return res; +} + +LIR* MipsMir2Lir::OpReg(OpKind op, int r_dest_src) +{ + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpBlx: + opcode = kMipsJalr; + break; + case kOpBx: + return NewLIR1(kMipsJr, r_dest_src); + break; + default: + LOG(FATAL) << "Bad case in OpReg"; + } + return NewLIR2(opcode, r_RA, r_dest_src); +} + +LIR* MipsMir2Lir::OpRegImm(OpKind op, int r_dest_src1, + int value) +{ + LIR *res; + bool neg = (value < 0); + int abs_value = (neg) ? -value : value; + bool short_form = (abs_value & 0xff) == abs_value; + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpAdd: + return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); + break; + case kOpSub: + return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); + break; + default: + LOG(FATAL) << "Bad case in OpRegImm"; + break; + } + if (short_form) + res = NewLIR2(opcode, r_dest_src1, abs_value); + else { + int r_scratch = AllocTemp(); + res = LoadConstant(r_scratch, value); + if (op == kOpCmp) + NewLIR2(opcode, r_dest_src1, r_scratch); + else + NewLIR3(opcode, r_dest_src1, r_dest_src1, r_scratch); + } + return res; +} + +LIR* MipsMir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2) +{ + MipsOpCode opcode = kMipsNop; + switch (op) { + case kOpAdd: + opcode = kMipsAddu; + break; + case kOpSub: + opcode = kMipsSubu; + break; + case kOpAnd: + opcode = kMipsAnd; + break; + case kOpMul: + opcode = kMipsMul; + break; + case kOpOr: + opcode = kMipsOr; + break; + case kOpXor: + opcode = kMipsXor; + break; + case kOpLsl: + opcode = kMipsSllv; + break; + case kOpLsr: + opcode = kMipsSrlv; + break; + case kOpAsr: + opcode = kMipsSrav; + break; + case kOpAdc: + case kOpSbc: + LOG(FATAL) << "No carry bit on MIPS"; + break; + default: + LOG(FATAL) << "bad case in OpRegRegReg"; + break; + } + return NewLIR3(opcode, r_dest, r_src1, r_src2); +} + +LIR* MipsMir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) +{ + LIR *res; + MipsOpCode opcode = kMipsNop; + bool short_form = true; + + switch (op) { + case kOpAdd: + if (IS_SIMM16(value)) { + opcode = kMipsAddiu; + } + else { + short_form = false; + opcode = kMipsAddu; + } + break; + case kOpSub: + if (IS_SIMM16((-value))) { + value = -value; + opcode = kMipsAddiu; + } + else { + short_form = false; + opcode = kMipsSubu; + } + break; + case kOpLsl: + DCHECK(value >= 0 && value <= 31); + opcode = kMipsSll; + break; + case kOpLsr: + DCHECK(value >= 0 && value <= 31); + opcode = kMipsSrl; + break; + case kOpAsr: + DCHECK(value >= 0 && value <= 31); + opcode = kMipsSra; + break; + case kOpAnd: + if (IS_UIMM16((value))) { + opcode = kMipsAndi; + } + else { + short_form = false; + opcode = kMipsAnd; + } + break; + case kOpOr: + if (IS_UIMM16((value))) { + opcode = kMipsOri; + } + else { + short_form = false; + opcode = kMipsOr; + } + break; + case kOpXor: + if (IS_UIMM16((value))) { + opcode = kMipsXori; + } + else { + short_form = false; + opcode = kMipsXor; + } + break; + case kOpMul: + short_form = false; + opcode = kMipsMul; + break; + default: + LOG(FATAL) << "Bad case in OpRegRegImm"; + break; + } + + if (short_form) + res = NewLIR3(opcode, r_dest, r_src1, value); + else { + if (r_dest != r_src1) { + res = LoadConstant(r_dest, value); + NewLIR3(opcode, r_dest, r_src1, r_dest); + } else { + int r_scratch = AllocTemp(); + res = LoadConstant(r_scratch, value); + NewLIR3(opcode, r_dest, r_src1, r_scratch); + } + } + return res; +} + +LIR* MipsMir2Lir::OpRegReg(OpKind op, int r_dest_src1, int r_src2) +{ + MipsOpCode opcode = kMipsNop; + LIR *res; + switch (op) { + case kOpMov: + opcode = kMipsMove; + break; + case kOpMvn: + return NewLIR3(kMipsNor, r_dest_src1, r_src2, r_ZERO); + case kOpNeg: + return NewLIR3(kMipsSubu, r_dest_src1, r_ZERO, r_src2); + case kOpAdd: + case kOpAnd: + case kOpMul: + case kOpOr: + case kOpSub: + case kOpXor: + return OpRegRegReg(op, r_dest_src1, r_dest_src1, r_src2); + case kOp2Byte: +#if __mips_isa_rev>=2 + res = NewLIR2(kMipsSeb, r_dest_src1, r_src2); +#else + res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 24); + OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 24); +#endif + return res; + case kOp2Short: +#if __mips_isa_rev>=2 + res = NewLIR2(kMipsSeh, r_dest_src1, r_src2); +#else + res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 16); + OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 16); +#endif + return res; + case kOp2Char: + return NewLIR3(kMipsAndi, r_dest_src1, r_src2, 0xFFFF); + default: + LOG(FATAL) << "Bad case in OpRegReg"; + break; + } + return NewLIR2(opcode, r_dest_src1, r_src2); +} + +LIR* MipsMir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) +{ + LIR *res; + res = LoadConstantNoClobber(r_dest_lo, Low32Bits(value)); + LoadConstantNoClobber(r_dest_hi, High32Bits(value)); + return res; +} + +/* Load value from base + scaled index. */ +LIR* MipsMir2Lir::LoadBaseIndexed(int rBase, int r_index, int r_dest, + int scale, OpSize size) +{ + LIR *first = NULL; + LIR *res; + MipsOpCode opcode = kMipsNop; + int t_reg = AllocTemp(); + + if (MIPS_FPREG(r_dest)) { + DCHECK(MIPS_SINGLEREG(r_dest)); + DCHECK((size == kWord) || (size == kSingle)); + size = kSingle; + } else { + if (size == kSingle) + size = kWord; + } + + if (!scale) { + first = NewLIR3(kMipsAddu, t_reg , rBase, r_index); + } else { + first = OpRegRegImm(kOpLsl, t_reg, r_index, scale); + NewLIR3(kMipsAddu, t_reg , rBase, t_reg); + } + + switch (size) { + case kSingle: + opcode = kMipsFlwc1; + break; + case kWord: + opcode = kMipsLw; + break; + case kUnsignedHalf: + opcode = kMipsLhu; + break; + case kSignedHalf: + opcode = kMipsLh; + break; + case kUnsignedByte: + opcode = kMipsLbu; + break; + case kSignedByte: + opcode = kMipsLb; + break; + default: + LOG(FATAL) << "Bad case in LoadBaseIndexed"; + } + + res = NewLIR3(opcode, r_dest, 0, t_reg); + FreeTemp(t_reg); + return (first) ? first : res; +} + +/* store value base base + scaled index. */ +LIR* MipsMir2Lir::StoreBaseIndexed(int rBase, int r_index, int r_src, + int scale, OpSize size) +{ + LIR *first = NULL; + MipsOpCode opcode = kMipsNop; + int r_new_index = r_index; + int t_reg = AllocTemp(); + + if (MIPS_FPREG(r_src)) { + DCHECK(MIPS_SINGLEREG(r_src)); + DCHECK((size == kWord) || (size == kSingle)); + size = kSingle; + } else { + if (size == kSingle) + size = kWord; + } + + if (!scale) { + first = NewLIR3(kMipsAddu, t_reg , rBase, r_index); + } else { + first = OpRegRegImm(kOpLsl, t_reg, r_index, scale); + NewLIR3(kMipsAddu, t_reg , rBase, t_reg); + } + + switch (size) { + case kSingle: + opcode = kMipsFswc1; + break; + case kWord: + opcode = kMipsSw; + break; + case kUnsignedHalf: + case kSignedHalf: + opcode = kMipsSh; + break; + case kUnsignedByte: + case kSignedByte: + opcode = kMipsSb; + break; + default: + LOG(FATAL) << "Bad case in StoreBaseIndexed"; + } + NewLIR3(opcode, r_src, 0, t_reg); + FreeTemp(r_new_index); + return first; +} + +LIR* MipsMir2Lir::LoadBaseDispBody(int rBase, int displacement, int r_dest, + int r_dest_hi, OpSize size, int s_reg) +/* + * Load value from base + displacement. Optionally perform null check + * on base (which must have an associated s_reg and MIR). If not + * performing null check, incoming MIR can be null. IMPORTANT: this + * code must not allocate any new temps. If a new register is needed + * and base and dest are the same, spill some other register to + * rlp and then restore. + */ +{ + LIR *res; + LIR *load = NULL; + LIR *load2 = NULL; + MipsOpCode opcode = kMipsNop; + bool short_form = IS_SIMM16(displacement); + bool pair = false; + + switch (size) { + case kLong: + case kDouble: + pair = true; + opcode = kMipsLw; + if (MIPS_FPREG(r_dest)) { + opcode = kMipsFlwc1; + if (MIPS_DOUBLEREG(r_dest)) { + r_dest = r_dest - MIPS_FP_DOUBLE; + } else { + DCHECK(MIPS_FPREG(r_dest_hi)); + DCHECK(r_dest == (r_dest_hi - 1)); + } + r_dest_hi = r_dest + 1; + } + short_form = IS_SIMM16_2WORD(displacement); + DCHECK_EQ((displacement & 0x3), 0); + break; + case kWord: + case kSingle: + opcode = kMipsLw; + if (MIPS_FPREG(r_dest)) { + opcode = kMipsFlwc1; + DCHECK(MIPS_SINGLEREG(r_dest)); + } + DCHECK_EQ((displacement & 0x3), 0); + break; + case kUnsignedHalf: + opcode = kMipsLhu; + DCHECK_EQ((displacement & 0x1), 0); + break; + case kSignedHalf: + opcode = kMipsLh; + DCHECK_EQ((displacement & 0x1), 0); + break; + case kUnsignedByte: + opcode = kMipsLbu; + break; + case kSignedByte: + opcode = kMipsLb; + break; + default: + LOG(FATAL) << "Bad case in LoadBaseIndexedBody"; + } + + if (short_form) { + if (!pair) { + load = res = NewLIR3(opcode, r_dest, displacement, rBase); + } else { + load = res = NewLIR3(opcode, r_dest, + displacement + LOWORD_OFFSET, rBase); + load2 = NewLIR3(opcode, r_dest_hi, + displacement + HIWORD_OFFSET, rBase); + } + } else { + if (pair) { + int r_tmp = AllocFreeTemp(); + res = OpRegRegImm(kOpAdd, r_tmp, rBase, displacement); + load = NewLIR3(opcode, r_dest, LOWORD_OFFSET, r_tmp); + load2 = NewLIR3(opcode, r_dest_hi, HIWORD_OFFSET, r_tmp); + FreeTemp(r_tmp); + } else { + int r_tmp = (rBase == r_dest) ? AllocFreeTemp() : r_dest; + res = OpRegRegImm(kOpAdd, r_tmp, rBase, displacement); + load = NewLIR3(opcode, r_dest, 0, r_tmp); + if (r_tmp != r_dest) + FreeTemp(r_tmp); + } + } + + if (rBase == rMIPS_SP) { + AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, + true /* is_load */, pair /* is64bit */); + if (pair) { + AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2, + true /* is_load */, pair /* is64bit */); + } + } + return load; +} + +LIR* MipsMir2Lir::LoadBaseDisp(int rBase, int displacement, int r_dest, + OpSize size, int s_reg) +{ + return LoadBaseDispBody(rBase, displacement, r_dest, -1, + size, s_reg); +} + +LIR* MipsMir2Lir::LoadBaseDispWide(int rBase, int displacement, + int r_dest_lo, int r_dest_hi, int s_reg) +{ + return LoadBaseDispBody(rBase, displacement, r_dest_lo, r_dest_hi, kLong, s_reg); +} + +LIR* MipsMir2Lir::StoreBaseDispBody(int rBase, int displacement, + int r_src, int r_src_hi, OpSize size) +{ + LIR *res; + LIR *store = NULL; + LIR *store2 = NULL; + MipsOpCode opcode = kMipsNop; + bool short_form = IS_SIMM16(displacement); + bool pair = false; + + switch (size) { + case kLong: + case kDouble: + pair = true; + opcode = kMipsSw; + if (MIPS_FPREG(r_src)) { + opcode = kMipsFswc1; + if (MIPS_DOUBLEREG(r_src)) { + r_src = r_src - MIPS_FP_DOUBLE; + } else { + DCHECK(MIPS_FPREG(r_src_hi)); + DCHECK_EQ(r_src, (r_src_hi - 1)); + } + r_src_hi = r_src + 1; + } + short_form = IS_SIMM16_2WORD(displacement); + DCHECK_EQ((displacement & 0x3), 0); + break; + case kWord: + case kSingle: + opcode = kMipsSw; + if (MIPS_FPREG(r_src)) { + opcode = kMipsFswc1; + DCHECK(MIPS_SINGLEREG(r_src)); + } + DCHECK_EQ((displacement & 0x3), 0); + break; + case kUnsignedHalf: + case kSignedHalf: + opcode = kMipsSh; + DCHECK_EQ((displacement & 0x1), 0); + break; + case kUnsignedByte: + case kSignedByte: + opcode = kMipsSb; + break; + default: + LOG(FATAL) << "Bad case in StoreBaseIndexedBody"; + } + + if (short_form) { + if (!pair) { + store = res = NewLIR3(opcode, r_src, displacement, rBase); + } else { + store = res = NewLIR3(opcode, r_src, displacement + LOWORD_OFFSET, + rBase); + store2 = NewLIR3(opcode, r_src_hi, displacement + HIWORD_OFFSET, + rBase); + } + } else { + int r_scratch = AllocTemp(); + res = OpRegRegImm(kOpAdd, r_scratch, rBase, displacement); + if (!pair) { + store = NewLIR3(opcode, r_src, 0, r_scratch); + } else { + store = NewLIR3(opcode, r_src, LOWORD_OFFSET, r_scratch); + store2 = NewLIR3(opcode, r_src_hi, HIWORD_OFFSET, r_scratch); + } + FreeTemp(r_scratch); + } + + if (rBase == rMIPS_SP) { + AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, + false /* is_load */, pair /* is64bit */); + if (pair) { + AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2, + false /* is_load */, pair /* is64bit */); + } + } + + return res; +} + +LIR* MipsMir2Lir::StoreBaseDisp(int rBase, int displacement, int r_src, + OpSize size) +{ + return StoreBaseDispBody(rBase, displacement, r_src, -1, size); +} + +LIR* MipsMir2Lir::StoreBaseDispWide(int rBase, int displacement, + int r_src_lo, int r_src_hi) +{ + return StoreBaseDispBody(rBase, displacement, r_src_lo, r_src_hi, kLong); +} + +LIR* MipsMir2Lir::OpThreadMem(OpKind op, int thread_offset) +{ + LOG(FATAL) << "Unexpected use of OpThreadMem for MIPS"; + return NULL; +} + +LIR* MipsMir2Lir::OpMem(OpKind op, int rBase, int disp) +{ + LOG(FATAL) << "Unexpected use of OpMem for MIPS"; + return NULL; +} + +LIR* MipsMir2Lir::StoreBaseIndexedDisp( int rBase, int r_index, int scale, int displacement, + int r_src, int r_src_hi, OpSize size, int s_reg) +{ + LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for MIPS"; + return NULL; +} + +LIR* MipsMir2Lir::OpRegMem(OpKind op, int r_dest, int rBase, + int offset) +{ + LOG(FATAL) << "Unexpected use of OpRegMem for MIPS"; + return NULL; +} + +LIR* MipsMir2Lir::LoadBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_dest, int r_dest_hi, OpSize size, int s_reg) +{ + LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for MIPS"; + return NULL; +} + +LIR* MipsMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) +{ + LOG(FATAL) << "Unexpected use of OpCondBranch for MIPS"; + return NULL; +} + +} // namespace art diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h new file mode 100644 index 0000000000..4eef264a0f --- /dev/null +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_ +#define ART_SRC_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_ + +#include "mir_to_lir.h" + +#include "dex/compiler_internals.h" + +namespace art { + +/* Mark a temp register as dead. Does not affect allocation state. */ +inline void Mir2Lir::ClobberBody(RegisterInfo* p) { + if (p->is_temp) { + DCHECK(!(p->live && p->dirty)) << "Live & dirty temp in clobber"; + p->live = false; + p->s_reg = INVALID_SREG; + p->def_start = NULL; + p->def_end = NULL; + if (p->pair) { + p->pair = false; + Clobber(p->partner); + } + } +} + +inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, + int op1, int op2, int op3, int op4, LIR* target) { + LIR* insn = static_cast<LIR*>(arena_->NewMem(sizeof(LIR), true, ArenaAllocator::kAllocLIR)); + insn->dalvik_offset = dalvik_offset; + insn->opcode = opcode; + insn->operands[0] = op0; + insn->operands[1] = op1; + insn->operands[2] = op2; + insn->operands[3] = op3; + insn->operands[4] = op4; + insn->target = target; + SetupResourceMasks(insn); + if ((opcode == kPseudoTargetLabel) || (opcode == kPseudoSafepointPC) || + (opcode == kPseudoExportedPC)) { + // Always make labels scheduling barriers + insn->use_mask = insn->def_mask = ENCODE_ALL; + } + return insn; +} + +/* + * The following are building blocks to construct low-level IRs with 0 - 4 + * operands. + */ +inline LIR* Mir2Lir::NewLIR0(int opcode) { + DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND)) + << GetTargetInstName(opcode) << " " << opcode << " " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " + << current_dalvik_offset_; + LIR* insn = RawLIR(current_dalvik_offset_, opcode); + AppendLIR(insn); + return insn; +} + +inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) { + DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP)) + << GetTargetInstName(opcode) << " " << opcode << " " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " + << current_dalvik_offset_; + LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest); + AppendLIR(insn); + return insn; +} + +inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) { + DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP)) + << GetTargetInstName(opcode) << " " << opcode << " " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " + << current_dalvik_offset_; + LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1); + AppendLIR(insn); + return insn; +} + +inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) { + DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP)) + << GetTargetInstName(opcode) << " " << opcode << " " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " + << current_dalvik_offset_; + LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1, src2); + AppendLIR(insn); + return insn; +} + +inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) { + DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP)) + << GetTargetInstName(opcode) << " " << opcode << " " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " + << current_dalvik_offset_; + LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1, src2, info); + AppendLIR(insn); + return insn; +} + +inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1, + int info2) { + DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP)) + << GetTargetInstName(opcode) << " " << opcode << " " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " + << current_dalvik_offset_; + LIR* insn = RawLIR(current_dalvik_offset_, opcode, dest, src1, src2, info1, info2); + AppendLIR(insn); + return insn; +} + +/* + * Mark the corresponding bit(s). + */ +inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) { + *mask |= GetRegMaskCommon(reg); +} + +/* + * Set up the proper fields in the resource mask + */ +inline void Mir2Lir::SetupResourceMasks(LIR* lir) { + int opcode = lir->opcode; + + if (opcode <= 0) { + lir->use_mask = lir->def_mask = 0; + return; + } + + uint64_t flags = GetTargetInstFlags(opcode); + + if (flags & NEEDS_FIXUP) { + lir->flags.pcRelFixup = true; + } + + /* Get the starting size of the instruction's template */ + lir->flags.size = GetInsnSize(lir); + + /* Set up the mask for resources that are updated */ + if (flags & (IS_LOAD | IS_STORE)) { + /* Default to heap - will catch specialized classes later */ + SetMemRefType(lir, flags & IS_LOAD, kHeapRef); + } + + /* + * Conservatively assume the branch here will call out a function that in + * turn will trash everything. + */ + if (flags & IS_BRANCH) { + lir->def_mask = lir->use_mask = ENCODE_ALL; + return; + } + + if (flags & REG_DEF0) { + SetupRegMask(&lir->def_mask, lir->operands[0]); + } + + if (flags & REG_DEF1) { + SetupRegMask(&lir->def_mask, lir->operands[1]); + } + + + if (flags & SETS_CCODES) { + lir->def_mask |= ENCODE_CCODE; + } + + if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) { + int i; + + for (i = 0; i < 4; i++) { + if (flags & (1 << (kRegUse0 + i))) { + SetupRegMask(&lir->use_mask, lir->operands[i]); + } + } + } + + if (flags & USES_CCODES) { + lir->use_mask |= ENCODE_CCODE; + } + + // Handle target-specific actions + SetupTargetResourceMasks(lir); +} + +} // namespace art + +#endif // ART_SRC_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_ diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc new file mode 100644 index 0000000000..4562482a06 --- /dev/null +++ b/compiler/dex/quick/mir_to_lir.cc @@ -0,0 +1,843 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex/compiler_internals.h" +#include "dex/dataflow_iterator-inl.h" +#include "mir_to_lir-inl.h" +#include "object_utils.h" + +namespace art { + +/* + * Target-independent code generation. Use only high-level + * load/store utilities here, or target-dependent genXX() handlers + * when necessary. + */ +void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list) +{ + RegLocation rl_src[3]; + RegLocation rl_dest = mir_graph_->GetBadLoc(); + RegLocation rl_result = mir_graph_->GetBadLoc(); + Instruction::Code opcode = mir->dalvikInsn.opcode; + int opt_flags = mir->optimization_flags; + uint32_t vB = mir->dalvikInsn.vB; + uint32_t vC = mir->dalvikInsn.vC; + + // Prep Src and Dest locations. + int next_sreg = 0; + int next_loc = 0; + int attrs = mir_graph_->oat_data_flow_attributes_[opcode]; + rl_src[0] = rl_src[1] = rl_src[2] = mir_graph_->GetBadLoc(); + if (attrs & DF_UA) { + if (attrs & DF_A_WIDE) { + rl_src[next_loc++] = mir_graph_->GetSrcWide(mir, next_sreg); + next_sreg+= 2; + } else { + rl_src[next_loc++] = mir_graph_->GetSrc(mir, next_sreg); + next_sreg++; + } + } + if (attrs & DF_UB) { + if (attrs & DF_B_WIDE) { + rl_src[next_loc++] = mir_graph_->GetSrcWide(mir, next_sreg); + next_sreg+= 2; + } else { + rl_src[next_loc++] = mir_graph_->GetSrc(mir, next_sreg); + next_sreg++; + } + } + if (attrs & DF_UC) { + if (attrs & DF_C_WIDE) { + rl_src[next_loc++] = mir_graph_->GetSrcWide(mir, next_sreg); + } else { + rl_src[next_loc++] = mir_graph_->GetSrc(mir, next_sreg); + } + } + if (attrs & DF_DA) { + if (attrs & DF_A_WIDE) { + rl_dest = mir_graph_->GetDestWide(mir); + } else { + rl_dest = mir_graph_->GetDest(mir); + } + } + switch (opcode) { + case Instruction::NOP: + break; + + case Instruction::MOVE_EXCEPTION: + GenMoveException(rl_dest); + break; + + case Instruction::RETURN_VOID: + if (((cu_->access_flags & kAccConstructor) != 0) && + cu_->compiler_driver->RequiresConstructorBarrier(Thread::Current(), cu_->dex_file, + cu_->class_def_idx)) { + GenMemBarrier(kStoreStore); + } + if (!mir_graph_->MethodIsLeaf()) { + GenSuspendTest(opt_flags); + } + break; + + case Instruction::RETURN: + case Instruction::RETURN_OBJECT: + if (!mir_graph_->MethodIsLeaf()) { + GenSuspendTest(opt_flags); + } + StoreValue(GetReturn(cu_->shorty[0] == 'F'), rl_src[0]); + break; + + case Instruction::RETURN_WIDE: + if (!mir_graph_->MethodIsLeaf()) { + GenSuspendTest(opt_flags); + } + StoreValueWide(GetReturnWide(cu_->shorty[0] == 'D'), rl_src[0]); + break; + + case Instruction::MOVE_RESULT_WIDE: + if (opt_flags & MIR_INLINED) + break; // Nop - combined w/ previous invoke. + StoreValueWide(rl_dest, GetReturnWide(rl_dest.fp)); + break; + + case Instruction::MOVE_RESULT: + case Instruction::MOVE_RESULT_OBJECT: + if (opt_flags & MIR_INLINED) + break; // Nop - combined w/ previous invoke. + StoreValue(rl_dest, GetReturn(rl_dest.fp)); + break; + + case Instruction::MOVE: + case Instruction::MOVE_OBJECT: + case Instruction::MOVE_16: + case Instruction::MOVE_OBJECT_16: + case Instruction::MOVE_FROM16: + case Instruction::MOVE_OBJECT_FROM16: + StoreValue(rl_dest, rl_src[0]); + break; + + case Instruction::MOVE_WIDE: + case Instruction::MOVE_WIDE_16: + case Instruction::MOVE_WIDE_FROM16: + StoreValueWide(rl_dest, rl_src[0]); + break; + + case Instruction::CONST: + case Instruction::CONST_4: + case Instruction::CONST_16: + rl_result = EvalLoc(rl_dest, kAnyReg, true); + LoadConstantNoClobber(rl_result.low_reg, vB); + StoreValue(rl_dest, rl_result); + if (vB == 0) { + Workaround7250540(rl_dest, rl_result.low_reg); + } + break; + + case Instruction::CONST_HIGH16: + rl_result = EvalLoc(rl_dest, kAnyReg, true); + LoadConstantNoClobber(rl_result.low_reg, vB << 16); + StoreValue(rl_dest, rl_result); + if (vB == 0) { + Workaround7250540(rl_dest, rl_result.low_reg); + } + break; + + case Instruction::CONST_WIDE_16: + case Instruction::CONST_WIDE_32: + rl_result = EvalLoc(rl_dest, kAnyReg, true); + LoadConstantWide(rl_result.low_reg, rl_result.high_reg, + static_cast<int64_t>(static_cast<int32_t>(vB))); + StoreValueWide(rl_dest, rl_result); + break; + + case Instruction::CONST_WIDE: + rl_result = EvalLoc(rl_dest, kAnyReg, true); + LoadConstantWide(rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide); + StoreValueWide(rl_dest, rl_result); + break; + + case Instruction::CONST_WIDE_HIGH16: + rl_result = EvalLoc(rl_dest, kAnyReg, true); + LoadConstantWide(rl_result.low_reg, rl_result.high_reg, + static_cast<int64_t>(vB) << 48); + StoreValueWide(rl_dest, rl_result); + break; + + case Instruction::MONITOR_ENTER: + GenMonitorEnter(opt_flags, rl_src[0]); + break; + + case Instruction::MONITOR_EXIT: + GenMonitorExit(opt_flags, rl_src[0]); + break; + + case Instruction::CHECK_CAST: { + GenCheckCast(mir->offset, vB, rl_src[0]); + break; + } + case Instruction::INSTANCE_OF: + GenInstanceof(vC, rl_dest, rl_src[0]); + break; + + case Instruction::NEW_INSTANCE: + GenNewInstance(vB, rl_dest); + break; + + case Instruction::THROW: + GenThrow(rl_src[0]); + break; + + case Instruction::ARRAY_LENGTH: + int len_offset; + len_offset = mirror::Array::LengthOffset().Int32Value(); + rl_src[0] = LoadValue(rl_src[0], kCoreReg); + GenNullCheck(rl_src[0].s_reg_low, rl_src[0].low_reg, opt_flags); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadWordDisp(rl_src[0].low_reg, len_offset, rl_result.low_reg); + StoreValue(rl_dest, rl_result); + break; + + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + GenConstString(vB, rl_dest); + break; + + case Instruction::CONST_CLASS: + GenConstClass(vB, rl_dest); + break; + + case Instruction::FILL_ARRAY_DATA: + GenFillArrayData(vB, rl_src[0]); + break; + + case Instruction::FILLED_NEW_ARRAY: + GenFilledNewArray(mir_graph_->NewMemCallInfo(bb, mir, kStatic, + false /* not range */)); + break; + + case Instruction::FILLED_NEW_ARRAY_RANGE: + GenFilledNewArray(mir_graph_->NewMemCallInfo(bb, mir, kStatic, + true /* range */)); + break; + + case Instruction::NEW_ARRAY: + GenNewArray(vC, rl_dest, rl_src[0]); + break; + + case Instruction::GOTO: + case Instruction::GOTO_16: + case Instruction::GOTO_32: + if (bb->taken->start_offset <= mir->offset) { + GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken->id]); + } else { + OpUnconditionalBranch(&label_list[bb->taken->id]); + } + break; + + case Instruction::PACKED_SWITCH: + GenPackedSwitch(mir, vB, rl_src[0]); + break; + + case Instruction::SPARSE_SWITCH: + GenSparseSwitch(mir, vB, rl_src[0]); + break; + + case Instruction::CMPL_FLOAT: + case Instruction::CMPG_FLOAT: + case Instruction::CMPL_DOUBLE: + case Instruction::CMPG_DOUBLE: + GenCmpFP(opcode, rl_dest, rl_src[0], rl_src[1]); + break; + + case Instruction::CMP_LONG: + GenCmpLong(rl_dest, rl_src[0], rl_src[1]); + break; + + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: { + LIR* taken = &label_list[bb->taken->id]; + LIR* fall_through = &label_list[bb->fall_through->id]; + bool backward_branch; + backward_branch = (bb->taken->start_offset <= mir->offset); + // Result known at compile time? + if (rl_src[0].is_const && rl_src[1].is_const) { + bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), + mir_graph_->ConstantValue(rl_src[1].orig_sreg)); + if (is_taken && backward_branch) { + GenSuspendTest(opt_flags); + } + int id = is_taken ? bb->taken->id : bb->fall_through->id; + OpUnconditionalBranch(&label_list[id]); + } else { + if (backward_branch) { + GenSuspendTest(opt_flags); + } + GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, + fall_through); + } + break; + } + + case Instruction::IF_EQZ: + case Instruction::IF_NEZ: + case Instruction::IF_LTZ: + case Instruction::IF_GEZ: + case Instruction::IF_GTZ: + case Instruction::IF_LEZ: { + LIR* taken = &label_list[bb->taken->id]; + LIR* fall_through = &label_list[bb->fall_through->id]; + bool backward_branch; + backward_branch = (bb->taken->start_offset <= mir->offset); + // Result known at compile time? + if (rl_src[0].is_const) { + bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0); + if (is_taken && backward_branch) { + GenSuspendTest(opt_flags); + } + int id = is_taken ? bb->taken->id : bb->fall_through->id; + OpUnconditionalBranch(&label_list[id]); + } else { + if (backward_branch) { + GenSuspendTest(opt_flags); + } + GenCompareZeroAndBranch(opcode, rl_src[0], taken, fall_through); + } + break; + } + + case Instruction::AGET_WIDE: + GenArrayGet(opt_flags, kLong, rl_src[0], rl_src[1], rl_dest, 3); + break; + case Instruction::AGET: + case Instruction::AGET_OBJECT: + GenArrayGet(opt_flags, kWord, rl_src[0], rl_src[1], rl_dest, 2); + break; + case Instruction::AGET_BOOLEAN: + GenArrayGet(opt_flags, kUnsignedByte, rl_src[0], rl_src[1], rl_dest, 0); + break; + case Instruction::AGET_BYTE: + GenArrayGet(opt_flags, kSignedByte, rl_src[0], rl_src[1], rl_dest, 0); + break; + case Instruction::AGET_CHAR: + GenArrayGet(opt_flags, kUnsignedHalf, rl_src[0], rl_src[1], rl_dest, 1); + break; + case Instruction::AGET_SHORT: + GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1); + break; + case Instruction::APUT_WIDE: + GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3); + break; + case Instruction::APUT: + GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2); + break; + case Instruction::APUT_OBJECT: + GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0], 2); + break; + case Instruction::APUT_SHORT: + case Instruction::APUT_CHAR: + GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1); + break; + case Instruction::APUT_BYTE: + case Instruction::APUT_BOOLEAN: + GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], + rl_src[0], 0); + break; + + case Instruction::IGET_OBJECT: + GenIGet(vC, opt_flags, kWord, rl_dest, rl_src[0], false, true); + break; + + case Instruction::IGET_WIDE: + GenIGet(vC, opt_flags, kLong, rl_dest, rl_src[0], true, false); + break; + + case Instruction::IGET: + GenIGet(vC, opt_flags, kWord, rl_dest, rl_src[0], false, false); + break; + + case Instruction::IGET_CHAR: + GenIGet(vC, opt_flags, kUnsignedHalf, rl_dest, rl_src[0], false, false); + break; + + case Instruction::IGET_SHORT: + GenIGet(vC, opt_flags, kSignedHalf, rl_dest, rl_src[0], false, false); + break; + + case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BYTE: + GenIGet(vC, opt_flags, kUnsignedByte, rl_dest, rl_src[0], false, false); + break; + + case Instruction::IPUT_WIDE: + GenIPut(vC, opt_flags, kLong, rl_src[0], rl_src[1], true, false); + break; + + case Instruction::IPUT_OBJECT: + GenIPut(vC, opt_flags, kWord, rl_src[0], rl_src[1], false, true); + break; + + case Instruction::IPUT: + GenIPut(vC, opt_flags, kWord, rl_src[0], rl_src[1], false, false); + break; + + case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BYTE: + GenIPut(vC, opt_flags, kUnsignedByte, rl_src[0], rl_src[1], false, false); + break; + + case Instruction::IPUT_CHAR: + GenIPut(vC, opt_flags, kUnsignedHalf, rl_src[0], rl_src[1], false, false); + break; + + case Instruction::IPUT_SHORT: + GenIPut(vC, opt_flags, kSignedHalf, rl_src[0], rl_src[1], false, false); + break; + + case Instruction::SGET_OBJECT: + GenSget(vB, rl_dest, false, true); + break; + case Instruction::SGET: + case Instruction::SGET_BOOLEAN: + case Instruction::SGET_BYTE: + case Instruction::SGET_CHAR: + case Instruction::SGET_SHORT: + GenSget(vB, rl_dest, false, false); + break; + + case Instruction::SGET_WIDE: + GenSget(vB, rl_dest, true, false); + break; + + case Instruction::SPUT_OBJECT: + GenSput(vB, rl_src[0], false, true); + break; + + case Instruction::SPUT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: + GenSput(vB, rl_src[0], false, false); + break; + + case Instruction::SPUT_WIDE: + GenSput(vB, rl_src[0], true, false); + break; + + case Instruction::INVOKE_STATIC_RANGE: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true)); + break; + case Instruction::INVOKE_STATIC: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false)); + break; + + case Instruction::INVOKE_DIRECT: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false)); + break; + case Instruction::INVOKE_DIRECT_RANGE: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true)); + break; + + case Instruction::INVOKE_VIRTUAL: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false)); + break; + case Instruction::INVOKE_VIRTUAL_RANGE: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true)); + break; + + case Instruction::INVOKE_SUPER: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false)); + break; + case Instruction::INVOKE_SUPER_RANGE: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true)); + break; + + case Instruction::INVOKE_INTERFACE: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false)); + break; + case Instruction::INVOKE_INTERFACE_RANGE: + GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true)); + break; + + case Instruction::NEG_INT: + case Instruction::NOT_INT: + GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[0]); + break; + + case Instruction::NEG_LONG: + case Instruction::NOT_LONG: + GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[0]); + break; + + case Instruction::NEG_FLOAT: + GenArithOpFloat(opcode, rl_dest, rl_src[0], rl_src[0]); + break; + + case Instruction::NEG_DOUBLE: + GenArithOpDouble(opcode, rl_dest, rl_src[0], rl_src[0]); + break; + + case Instruction::INT_TO_LONG: + GenIntToLong(rl_dest, rl_src[0]); + break; + + case Instruction::LONG_TO_INT: + rl_src[0] = UpdateLocWide(rl_src[0]); + rl_src[0] = WideToNarrow(rl_src[0]); + StoreValue(rl_dest, rl_src[0]); + break; + + case Instruction::INT_TO_BYTE: + case Instruction::INT_TO_SHORT: + case Instruction::INT_TO_CHAR: + GenIntNarrowing(opcode, rl_dest, rl_src[0]); + break; + + case Instruction::INT_TO_FLOAT: + case Instruction::INT_TO_DOUBLE: + case Instruction::LONG_TO_FLOAT: + case Instruction::LONG_TO_DOUBLE: + case Instruction::FLOAT_TO_INT: + case Instruction::FLOAT_TO_LONG: + case Instruction::FLOAT_TO_DOUBLE: + case Instruction::DOUBLE_TO_INT: + case Instruction::DOUBLE_TO_LONG: + case Instruction::DOUBLE_TO_FLOAT: + GenConversion(opcode, rl_dest, rl_src[0]); + break; + + + case Instruction::ADD_INT: + case Instruction::ADD_INT_2ADDR: + case Instruction::MUL_INT: + case Instruction::MUL_INT_2ADDR: + case Instruction::AND_INT: + case Instruction::AND_INT_2ADDR: + case Instruction::OR_INT: + case Instruction::OR_INT_2ADDR: + case Instruction::XOR_INT: + case Instruction::XOR_INT_2ADDR: + if (rl_src[0].is_const && + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[0]))) { + GenArithOpIntLit(opcode, rl_dest, rl_src[1], + mir_graph_->ConstantValue(rl_src[0].orig_sreg)); + } else if (rl_src[1].is_const && + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]))) { + GenArithOpIntLit(opcode, rl_dest, rl_src[0], + mir_graph_->ConstantValue(rl_src[1].orig_sreg)); + } else { + GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1]); + } + break; + + case Instruction::SUB_INT: + case Instruction::SUB_INT_2ADDR: + case Instruction::DIV_INT: + case Instruction::DIV_INT_2ADDR: + case Instruction::REM_INT: + case Instruction::REM_INT_2ADDR: + case Instruction::SHL_INT: + case Instruction::SHL_INT_2ADDR: + case Instruction::SHR_INT: + case Instruction::SHR_INT_2ADDR: + case Instruction::USHR_INT: + case Instruction::USHR_INT_2ADDR: + if (rl_src[1].is_const && + InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]))) { + GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1])); + } else { + GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1]); + } + break; + + case Instruction::ADD_LONG: + case Instruction::SUB_LONG: + case Instruction::AND_LONG: + case Instruction::OR_LONG: + case Instruction::XOR_LONG: + case Instruction::ADD_LONG_2ADDR: + case Instruction::SUB_LONG_2ADDR: + case Instruction::AND_LONG_2ADDR: + case Instruction::OR_LONG_2ADDR: + case Instruction::XOR_LONG_2ADDR: + if (rl_src[0].is_const || rl_src[1].is_const) { + GenArithImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1]); + break; + } + // Note: intentional fallthrough. + + case Instruction::MUL_LONG: + case Instruction::DIV_LONG: + case Instruction::REM_LONG: + case Instruction::MUL_LONG_2ADDR: + case Instruction::DIV_LONG_2ADDR: + case Instruction::REM_LONG_2ADDR: + GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[1]); + break; + + case Instruction::SHL_LONG: + case Instruction::SHR_LONG: + case Instruction::USHR_LONG: + case Instruction::SHL_LONG_2ADDR: + case Instruction::SHR_LONG_2ADDR: + case Instruction::USHR_LONG_2ADDR: + if (rl_src[1].is_const) { + GenShiftImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1]); + } else { + GenShiftOpLong(opcode, rl_dest, rl_src[0], rl_src[1]); + } + break; + + case Instruction::ADD_FLOAT: + case Instruction::SUB_FLOAT: + case Instruction::MUL_FLOAT: + case Instruction::DIV_FLOAT: + case Instruction::REM_FLOAT: + case Instruction::ADD_FLOAT_2ADDR: + case Instruction::SUB_FLOAT_2ADDR: + case Instruction::MUL_FLOAT_2ADDR: + case Instruction::DIV_FLOAT_2ADDR: + case Instruction::REM_FLOAT_2ADDR: + GenArithOpFloat(opcode, rl_dest, rl_src[0], rl_src[1]); + break; + + case Instruction::ADD_DOUBLE: + case Instruction::SUB_DOUBLE: + case Instruction::MUL_DOUBLE: + case Instruction::DIV_DOUBLE: + case Instruction::REM_DOUBLE: + case Instruction::ADD_DOUBLE_2ADDR: + case Instruction::SUB_DOUBLE_2ADDR: + case Instruction::MUL_DOUBLE_2ADDR: + case Instruction::DIV_DOUBLE_2ADDR: + case Instruction::REM_DOUBLE_2ADDR: + GenArithOpDouble(opcode, rl_dest, rl_src[0], rl_src[1]); + break; + + case Instruction::RSUB_INT: + case Instruction::ADD_INT_LIT16: + case Instruction::MUL_INT_LIT16: + case Instruction::DIV_INT_LIT16: + case Instruction::REM_INT_LIT16: + case Instruction::AND_INT_LIT16: + case Instruction::OR_INT_LIT16: + case Instruction::XOR_INT_LIT16: + case Instruction::ADD_INT_LIT8: + case Instruction::RSUB_INT_LIT8: + case Instruction::MUL_INT_LIT8: + case Instruction::DIV_INT_LIT8: + case Instruction::REM_INT_LIT8: + case Instruction::AND_INT_LIT8: + case Instruction::OR_INT_LIT8: + case Instruction::XOR_INT_LIT8: + case Instruction::SHL_INT_LIT8: + case Instruction::SHR_INT_LIT8: + case Instruction::USHR_INT_LIT8: + GenArithOpIntLit(opcode, rl_dest, rl_src[0], vC); + break; + + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } +} + +// Process extended MIR instructions +void Mir2Lir::HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir) +{ + switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { + case kMirOpCopy: { + RegLocation rl_src = mir_graph_->GetSrc(mir, 0); + RegLocation rl_dest = mir_graph_->GetDest(mir); + StoreValue(rl_dest, rl_src); + break; + } + case kMirOpFusedCmplFloat: + GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, false /*double*/); + break; + case kMirOpFusedCmpgFloat: + GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, false /*double*/); + break; + case kMirOpFusedCmplDouble: + GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, true /*double*/); + break; + case kMirOpFusedCmpgDouble: + GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, true /*double*/); + break; + case kMirOpFusedCmpLong: + GenFusedLongCmpBranch(bb, mir); + break; + case kMirOpSelect: + GenSelect(bb, mir); + break; + default: + break; + } +} + +// Handle the content in each basic block. +bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) +{ + if (bb->block_type == kDead) return false; + current_dalvik_offset_ = bb->start_offset; + MIR* mir; + int block_id = bb->id; + + block_label_list_[block_id].operands[0] = bb->start_offset; + + // Insert the block label. + block_label_list_[block_id].opcode = kPseudoNormalBlockLabel; + AppendLIR(&block_label_list_[block_id]); + + LIR* head_lir = NULL; + + // If this is a catch block, export the start address. + if (bb->catch_entry) { + head_lir = NewLIR0(kPseudoExportedPC); + } + + // Free temp registers and reset redundant store tracking. + ResetRegPool(); + ResetDefTracking(); + + ClobberAllRegs(); + + if (bb->block_type == kEntryBlock) { + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + GenEntrySequence(&mir_graph_->reg_location_[start_vreg], + mir_graph_->reg_location_[mir_graph_->GetMethodSReg()]); + } else if (bb->block_type == kExitBlock) { + GenExitSequence(); + } + + for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { + ResetRegPool(); + if (cu_->disable_opt & (1 << kTrackLiveTemps)) { + ClobberAllRegs(); + } + + if (cu_->disable_opt & (1 << kSuppressLoads)) { + ResetDefTracking(); + } + + // Reset temp tracking sanity check. + if (kIsDebugBuild) { + live_sreg_ = INVALID_SREG; + } + + current_dalvik_offset_ = mir->offset; + int opcode = mir->dalvikInsn.opcode; + LIR* boundary_lir; + + // Mark the beginning of a Dalvik instruction for line tracking. + char* inst_str = cu_->verbose ? + mir_graph_->GetDalvikDisassembly(mir) : NULL; + boundary_lir = MarkBoundary(mir->offset, inst_str); + // Remember the first LIR for this block. + if (head_lir == NULL) { + head_lir = boundary_lir; + // Set the first boundary_lir as a scheduling barrier. + head_lir->def_mask = ENCODE_ALL; + } + + if (opcode == kMirOpCheck) { + // Combine check and work halves of throwing instruction. + MIR* work_half = mir->meta.throw_insn; + mir->dalvikInsn.opcode = work_half->dalvikInsn.opcode; + opcode = work_half->dalvikInsn.opcode; + SSARepresentation* ssa_rep = work_half->ssa_rep; + work_half->ssa_rep = mir->ssa_rep; + mir->ssa_rep = ssa_rep; + work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2); + } + + if (opcode >= kMirOpFirst) { + HandleExtendedMethodMIR(bb, mir); + continue; + } + + CompileDalvikInstruction(mir, bb, block_label_list_); + } + + if (head_lir) { + // Eliminate redundant loads/stores and delay stores into later slots. + ApplyLocalOptimizations(head_lir, last_lir_insn_); + + // Generate an unconditional branch to the fallthrough block. + if (bb->fall_through) { + OpUnconditionalBranch(&block_label_list_[bb->fall_through->id]); + } + } + return false; +} + +void Mir2Lir::SpecialMIR2LIR(SpecialCaseHandler special_case) +{ + // Find the first DalvikByteCode block. + int num_reachable_blocks = mir_graph_->GetNumReachableBlocks(); + BasicBlock*bb = NULL; + for (int idx = 0; idx < num_reachable_blocks; idx++) { + // TODO: no direct access of growable lists. + int dfs_index = mir_graph_->GetDfsOrder()->Get(idx); + bb = mir_graph_->GetBasicBlock(dfs_index); + if (bb->block_type == kDalvikByteCode) { + break; + } + } + if (bb == NULL) { + return; + } + DCHECK_EQ(bb->start_offset, 0); + DCHECK(bb->first_mir_insn != NULL); + + // Get the first instruction. + MIR* mir = bb->first_mir_insn; + + // Free temp registers and reset redundant store tracking. + ResetRegPool(); + ResetDefTracking(); + ClobberAllRegs(); + + GenSpecialCase(bb, mir, special_case); +} + +void Mir2Lir::MethodMIR2LIR() +{ + // Hold the labels of each block. + block_label_list_ = + static_cast<LIR*>(arena_->NewMem(sizeof(LIR) * mir_graph_->GetNumBlocks(), true, + ArenaAllocator::kAllocLIR)); + + PreOrderDfsIterator iter(mir_graph_, false /* not iterative */); + for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { + MethodBlockCodeGen(bb); + } + + HandleSuspendLaunchPads(); + + HandleThrowLaunchPads(); + + HandleIntrinsicLaunchPads(); + + if (!(cu_->disable_opt & (1 << kSafeOptimizations))) { + RemoveRedundantBranches(); + } +} + +} // namespace art diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h new file mode 100644 index 0000000000..47514f769f --- /dev/null +++ b/compiler/dex/quick/mir_to_lir.h @@ -0,0 +1,779 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_MIR_TO_LIR_H_ +#define ART_SRC_COMPILER_DEX_QUICK_MIR_TO_LIR_H_ + +#include "invoke_type.h" +#include "compiled_method.h" +#include "dex/compiler_enums.h" +#include "dex/compiler_ir.h" +#include "dex/backend.h" +#include "dex/growable_array.h" +#include "dex/arena_allocator.h" +#include "driver/compiler_driver.h" +#include "safe_map.h" + +namespace art { + +// Set to 1 to measure cost of suspend check. +#define NO_SUSPEND 0 + +#define IS_BINARY_OP (1ULL << kIsBinaryOp) +#define IS_BRANCH (1ULL << kIsBranch) +#define IS_IT (1ULL << kIsIT) +#define IS_LOAD (1ULL << kMemLoad) +#define IS_QUAD_OP (1ULL << kIsQuadOp) +#define IS_QUIN_OP (1ULL << kIsQuinOp) +#define IS_SEXTUPLE_OP (1ULL << kIsSextupleOp) +#define IS_STORE (1ULL << kMemStore) +#define IS_TERTIARY_OP (1ULL << kIsTertiaryOp) +#define IS_UNARY_OP (1ULL << kIsUnaryOp) +#define NEEDS_FIXUP (1ULL << kPCRelFixup) +#define NO_OPERAND (1ULL << kNoOperand) +#define REG_DEF0 (1ULL << kRegDef0) +#define REG_DEF1 (1ULL << kRegDef1) +#define REG_DEFA (1ULL << kRegDefA) +#define REG_DEFD (1ULL << kRegDefD) +#define REG_DEF_FPCS_LIST0 (1ULL << kRegDefFPCSList0) +#define REG_DEF_FPCS_LIST2 (1ULL << kRegDefFPCSList2) +#define REG_DEF_LIST0 (1ULL << kRegDefList0) +#define REG_DEF_LIST1 (1ULL << kRegDefList1) +#define REG_DEF_LR (1ULL << kRegDefLR) +#define REG_DEF_SP (1ULL << kRegDefSP) +#define REG_USE0 (1ULL << kRegUse0) +#define REG_USE1 (1ULL << kRegUse1) +#define REG_USE2 (1ULL << kRegUse2) +#define REG_USE3 (1ULL << kRegUse3) +#define REG_USE4 (1ULL << kRegUse4) +#define REG_USEA (1ULL << kRegUseA) +#define REG_USEC (1ULL << kRegUseC) +#define REG_USED (1ULL << kRegUseD) +#define REG_USE_FPCS_LIST0 (1ULL << kRegUseFPCSList0) +#define REG_USE_FPCS_LIST2 (1ULL << kRegUseFPCSList2) +#define REG_USE_LIST0 (1ULL << kRegUseList0) +#define REG_USE_LIST1 (1ULL << kRegUseList1) +#define REG_USE_LR (1ULL << kRegUseLR) +#define REG_USE_PC (1ULL << kRegUsePC) +#define REG_USE_SP (1ULL << kRegUseSP) +#define SETS_CCODES (1ULL << kSetsCCodes) +#define USES_CCODES (1ULL << kUsesCCodes) + +// Common combo register usage patterns. +#define REG_DEF01 (REG_DEF0 | REG_DEF1) +#define REG_DEF01_USE2 (REG_DEF0 | REG_DEF1 | REG_USE2) +#define REG_DEF0_USE01 (REG_DEF0 | REG_USE01) +#define REG_DEF0_USE0 (REG_DEF0 | REG_USE0) +#define REG_DEF0_USE12 (REG_DEF0 | REG_USE12) +#define REG_DEF0_USE1 (REG_DEF0 | REG_USE1) +#define REG_DEF0_USE2 (REG_DEF0 | REG_USE2) +#define REG_DEFAD_USEAD (REG_DEFAD_USEA | REG_USED) +#define REG_DEFAD_USEA (REG_DEFA_USEA | REG_DEFD) +#define REG_DEFA_USEA (REG_DEFA | REG_USEA) +#define REG_USE012 (REG_USE01 | REG_USE2) +#define REG_USE014 (REG_USE01 | REG_USE4) +#define REG_USE01 (REG_USE0 | REG_USE1) +#define REG_USE02 (REG_USE0 | REG_USE2) +#define REG_USE12 (REG_USE1 | REG_USE2) +#define REG_USE23 (REG_USE2 | REG_USE3) + +struct BasicBlock; +struct CallInfo; +struct CompilationUnit; +struct MIR; +struct RegLocation; +struct RegisterInfo; +class MIRGraph; +class Mir2Lir; + +typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, + const MethodReference& target_method, + uint32_t method_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type); + +typedef std::vector<uint8_t> CodeBuffer; + + +struct LIR { + int offset; // Offset of this instruction. + int dalvik_offset; // Offset of Dalvik opcode. + LIR* next; + LIR* prev; + LIR* target; + int opcode; + int operands[5]; // [0..4] = [dest, src1, src2, extra, extra2]. + struct { + bool is_nop:1; // LIR is optimized away. + bool pcRelFixup:1; // May need pc-relative fixup. + unsigned int size:5; // Note: size is in bytes. + unsigned int unused:25; + } flags; + int alias_info; // For Dalvik register & litpool disambiguation. + uint64_t use_mask; // Resource mask for use. + uint64_t def_mask; // Resource mask for def. +}; + +// Target-specific initialization. +Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena); +Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena); +Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena); + +// Utility macros to traverse the LIR list. +#define NEXT_LIR(lir) (lir->next) +#define PREV_LIR(lir) (lir->prev) + +// Defines for alias_info (tracks Dalvik register references). +#define DECODE_ALIAS_INFO_REG(X) (X & 0xffff) +#define DECODE_ALIAS_INFO_WIDE_FLAG (0x80000000) +#define DECODE_ALIAS_INFO_WIDE(X) ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0) +#define ENCODE_ALIAS_INFO(REG, ISWIDE) (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0)) + +// Common resource macros. +#define ENCODE_CCODE (1ULL << kCCode) +#define ENCODE_FP_STATUS (1ULL << kFPStatus) + +// Abstract memory locations. +#define ENCODE_DALVIK_REG (1ULL << kDalvikReg) +#define ENCODE_LITERAL (1ULL << kLiteral) +#define ENCODE_HEAP_REF (1ULL << kHeapRef) +#define ENCODE_MUST_NOT_ALIAS (1ULL << kMustNotAlias) + +#define ENCODE_ALL (~0ULL) +#define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \ + ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS) +//TODO: replace these macros +#define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath)) +#define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath)) +#define SLOW_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowStringPath)) +#define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath)) +#define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath)) +#define is_pseudo_opcode(opcode) (static_cast<int>(opcode) < 0) + +class Mir2Lir : public Backend { + + public: + struct SwitchTable { + int offset; + const uint16_t* table; // Original dex table. + int vaddr; // Dalvik offset of switch opcode. + LIR* anchor; // Reference instruction for relative offsets. + LIR** targets; // Array of case targets. + }; + + struct FillArrayData { + int offset; + const uint16_t* table; // Original dex table. + int size; + int vaddr; // Dalvik offset of FILL_ARRAY_DATA opcode. + }; + + /* Static register use counts */ + struct RefCounts { + int count; + int s_reg; + bool double_start; // Starting v_reg for a double + }; + + /* + * Data structure tracking the mapping between a Dalvik register (pair) and a + * native register (pair). The idea is to reuse the previously loaded value + * if possible, otherwise to keep the value in a native register as long as + * possible. + */ + struct RegisterInfo { + int reg; // Reg number + bool in_use; // Has it been allocated? + bool is_temp; // Can allocate as temp? + bool pair; // Part of a register pair? + int partner; // If pair, other reg of pair. + bool live; // Is there an associated SSA name? + bool dirty; // If live, is it dirty? + int s_reg; // Name of live value. + LIR *def_start; // Starting inst in last def sequence. + LIR *def_end; // Ending inst in last def sequence. + }; + + struct RegisterPool { + int num_core_regs; + RegisterInfo *core_regs; + int next_core_reg; + int num_fp_regs; + RegisterInfo *FPRegs; + int next_fp_reg; + }; + + struct PromotionMap { + RegLocationType core_location:3; + uint8_t core_reg; + RegLocationType fp_location:3; + uint8_t FpReg; + bool first_in_pair; + }; + + virtual ~Mir2Lir(){}; + + int32_t s4FromSwitchData(const void* switch_data) { + return *reinterpret_cast<const int32_t*>(switch_data); + } + + RegisterClass oat_reg_class_by_size(OpSize size) { + return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte || + size == kSignedByte ) ? kCoreReg : kAnyReg; + } + + size_t CodeBufferSizeInBytes() { + return code_buffer_.size() / sizeof(code_buffer_[0]); + } + + // Shared by all targets - implemented in codegen_util.cc + void AppendLIR(LIR* lir); + void InsertLIRBefore(LIR* current_lir, LIR* new_lir); + void InsertLIRAfter(LIR* current_lir, LIR* new_lir); + + int ComputeFrameSize(); + virtual void Materialize(); + virtual CompiledMethod* GetCompiledMethod(); + void MarkSafepointPC(LIR* inst); + bool FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put); + void SetupResourceMasks(LIR* lir); + void AssembleLIR(); + void SetMemRefType(LIR* lir, bool is_load, int mem_type); + void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit); + void SetupRegMask(uint64_t* mask, int reg); + void DumpLIRInsn(LIR* arg, unsigned char* base_addr); + void DumpPromotionMap(); + void CodegenDump(); + LIR* RawLIR(int dalvik_offset, int opcode, int op0 = 0, int op1 = 0, + int op2 = 0, int op3 = 0, int op4 = 0, LIR* target = NULL); + LIR* NewLIR0(int opcode); + LIR* NewLIR1(int opcode, int dest); + LIR* NewLIR2(int opcode, int dest, int src1); + LIR* NewLIR3(int opcode, int dest, int src1, int src2); + LIR* NewLIR4(int opcode, int dest, int src1, int src2, int info); + LIR* NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2); + LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta); + LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi); + LIR* AddWordData(LIR* *constant_list_p, int value); + LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi); + void ProcessSwitchTables(); + void DumpSparseSwitchTable(const uint16_t* table); + void DumpPackedSwitchTable(const uint16_t* table); + LIR* MarkBoundary(int offset, const char* inst_str); + void NopLIR(LIR* lir); + bool EvaluateBranch(Instruction::Code opcode, int src1, int src2); + bool IsInexpensiveConstant(RegLocation rl_src); + ConditionCode FlipComparisonOrder(ConditionCode before); + void DumpMappingTable(const char* table_name, const std::string& descriptor, + const std::string& name, const std::string& signature, + const std::vector<uint32_t>& v); + void InstallLiteralPools(); + void InstallSwitchTables(); + void InstallFillArrayData(); + bool VerifyCatchEntries(); + void CreateMappingTables(); + void CreateNativeGcMap(); + int AssignLiteralOffset(int offset); + int AssignSwitchTablesOffset(int offset); + int AssignFillArrayDataOffset(int offset); + int AssignInsnOffsets(); + void AssignOffsets(); + LIR* InsertCaseLabel(int vaddr, int keyVal); + void MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec); + void MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec); + + // Shared by all targets - implemented in local_optimizations.cc + void ConvertMemOpIntoMove(LIR* orig_lir, int dest, int src); + void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir); + void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir); + void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir); + void RemoveRedundantBranches(); + + // Shared by all targets - implemented in ralloc_util.cc + int GetSRegHi(int lowSreg); + bool oat_live_out(int s_reg); + int oatSSASrc(MIR* mir, int num); + void SimpleRegAlloc(); + void ResetRegPool(); + void CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num); + void DumpRegPool(RegisterInfo* p, int num_regs); + void DumpCoreRegPool(); + void DumpFpRegPool(); + /* Mark a temp register as dead. Does not affect allocation state. */ + void Clobber(int reg) { + ClobberBody(GetRegInfo(reg)); + } + void ClobberSRegBody(RegisterInfo* p, int num_regs, int s_reg); + void ClobberSReg(int s_reg); + int SRegToPMap(int s_reg); + void RecordCorePromotion(int reg, int s_reg); + int AllocPreservedCoreReg(int s_reg); + void RecordFpPromotion(int reg, int s_reg); + int AllocPreservedSingle(int s_reg, bool even); + int AllocPreservedDouble(int s_reg); + int AllocPreservedFPReg(int s_reg, bool double_start); + int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, + bool required); + int AllocTempDouble(); + int AllocFreeTemp(); + int AllocTemp(); + int AllocTempFloat(); + RegisterInfo* AllocLiveBody(RegisterInfo* p, int num_regs, int s_reg); + RegisterInfo* AllocLive(int s_reg, int reg_class); + void FreeTemp(int reg); + RegisterInfo* IsLive(int reg); + RegisterInfo* IsTemp(int reg); + RegisterInfo* IsPromoted(int reg); + bool IsDirty(int reg); + void LockTemp(int reg); + void ResetDef(int reg); + void NullifyRange(LIR *start, LIR *finish, int s_reg1, int s_reg2); + void MarkDef(RegLocation rl, LIR *start, LIR *finish); + void MarkDefWide(RegLocation rl, LIR *start, LIR *finish); + RegLocation WideToNarrow(RegLocation rl); + void ResetDefLoc(RegLocation rl); + void ResetDefLocWide(RegLocation rl); + void ResetDefTracking(); + void ClobberAllRegs(); + void FlushAllRegsBody(RegisterInfo* info, int num_regs); + void FlushAllRegs(); + bool RegClassMatches(int reg_class, int reg); + void MarkLive(int reg, int s_reg); + void MarkTemp(int reg); + void UnmarkTemp(int reg); + void MarkPair(int low_reg, int high_reg); + void MarkClean(RegLocation loc); + void MarkDirty(RegLocation loc); + void MarkInUse(int reg); + void CopyRegInfo(int new_reg, int old_reg); + bool CheckCorePoolSanity(); + RegLocation UpdateLoc(RegLocation loc); + RegLocation UpdateLocWide(RegLocation loc); + RegLocation UpdateRawLoc(RegLocation loc); + RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update); + RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts); + void DumpCounts(const RefCounts* arr, int size, const char* msg); + void DoPromotion(); + int VRegOffset(int v_reg); + int SRegOffset(int s_reg); + RegLocation GetReturnWide(bool is_double); + RegLocation GetReturn(bool is_float); + + // Shared by all targets - implemented in gen_common.cc. + bool HandleEasyDivide(Instruction::Code dalvik_opcode, + RegLocation rl_src, RegLocation rl_dest, int lit); + bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit); + void HandleSuspendLaunchPads(); + void HandleIntrinsicLaunchPads(); + void HandleThrowLaunchPads(); + void GenBarrier(); + LIR* GenCheck(ConditionCode c_code, ThrowKind kind); + LIR* GenImmedCheck(ConditionCode c_code, int reg, int imm_val, + ThrowKind kind); + LIR* GenNullCheck(int s_reg, int m_reg, int opt_flags); + LIR* GenRegRegCheck(ConditionCode c_code, int reg1, int reg2, + ThrowKind kind); + void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, + RegLocation rl_src2, LIR* taken, LIR* fall_through); + void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, + LIR* taken, LIR* fall_through); + void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); + void GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src); + void GenNewArray(uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src); + void GenFilledNewArray(CallInfo* info); + void GenSput(uint32_t field_idx, RegLocation rl_src, + bool is_long_or_double, bool is_object); + void GenSget(uint32_t field_idx, RegLocation rl_dest, + bool is_long_or_double, bool is_object); + void GenIGet(uint32_t field_idx, int opt_flags, OpSize size, + RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, bool is_object); + void GenIPut(uint32_t field_idx, int opt_flags, OpSize size, + RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, bool is_object); + void GenConstClass(uint32_t type_idx, RegLocation rl_dest); + void GenConstString(uint32_t string_idx, RegLocation rl_dest); + void GenNewInstance(uint32_t type_idx, RegLocation rl_dest); + void GenThrow(RegLocation rl_src); + void GenInstanceof(uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src); + void GenCheckCast(uint32_t insn_idx, uint32_t type_idx, + RegLocation rl_src); + void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src, int lit); + void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenConversionCall(int func_offset, RegLocation rl_dest, + RegLocation rl_src); + void GenSuspendTest(int opt_flags); + void GenSuspendTestAndBranch(int opt_flags, LIR* target); + + // Shared by all targets - implemented in gen_invoke.cc. + int CallHelperSetup(int helper_offset); + LIR* CallHelper(int r_tgt, int helper_offset, bool safepoint_pc); + void CallRuntimeHelperImm(int helper_offset, int arg0, bool safepoint_pc); + void CallRuntimeHelperReg(int helper_offset, int arg0, bool safepoint_pc); + void CallRuntimeHelperRegLocation(int helper_offset, RegLocation arg0, + bool safepoint_pc); + void CallRuntimeHelperImmImm(int helper_offset, int arg0, int arg1, + bool safepoint_pc); + void CallRuntimeHelperImmRegLocation(int helper_offset, int arg0, + RegLocation arg1, bool safepoint_pc); + void CallRuntimeHelperRegLocationImm(int helper_offset, RegLocation arg0, + int arg1, bool safepoint_pc); + void CallRuntimeHelperImmReg(int helper_offset, int arg0, int arg1, + bool safepoint_pc); + void CallRuntimeHelperRegImm(int helper_offset, int arg0, int arg1, + bool safepoint_pc); + void CallRuntimeHelperImmMethod(int helper_offset, int arg0, + bool safepoint_pc); + void CallRuntimeHelperRegLocationRegLocation(int helper_offset, + RegLocation arg0, RegLocation arg1, + bool safepoint_pc); + void CallRuntimeHelperRegReg(int helper_offset, int arg0, int arg1, + bool safepoint_pc); + void CallRuntimeHelperRegRegImm(int helper_offset, int arg0, int arg1, + int arg2, bool safepoint_pc); + void CallRuntimeHelperImmMethodRegLocation(int helper_offset, int arg0, + RegLocation arg2, bool safepoint_pc); + void CallRuntimeHelperImmMethodImm(int helper_offset, int arg0, int arg2, + bool safepoint_pc); + void CallRuntimeHelperImmRegLocationRegLocation(int helper_offset, + int arg0, RegLocation arg1, RegLocation arg2, + bool safepoint_pc); + void GenInvoke(CallInfo* info); + void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); + int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + RegLocation InlineTarget(CallInfo* info); + RegLocation InlineTargetWide(CallInfo* info); + + bool GenInlinedCharAt(CallInfo* info); + bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty); + bool GenInlinedAbsInt(CallInfo* info); + bool GenInlinedAbsLong(CallInfo* info); + bool GenInlinedFloatCvt(CallInfo* info); + bool GenInlinedDoubleCvt(CallInfo* info); + bool GenInlinedIndexOf(CallInfo* info, bool zero_based); + bool GenInlinedStringCompareTo(CallInfo* info); + bool GenInlinedCurrentThread(CallInfo* info); + bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile); + bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object, + bool is_volatile, bool is_ordered); + bool GenIntrinsic(CallInfo* info); + int LoadArgRegs(CallInfo* info, int call_state, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + + // Shared by all targets - implemented in gen_loadstore.cc. + RegLocation LoadCurrMethod(); + void LoadCurrMethodDirect(int r_tgt); + LIR* LoadConstant(int r_dest, int value); + LIR* LoadWordDisp(int rBase, int displacement, int r_dest); + RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind); + RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind); + void LoadValueDirect(RegLocation rl_src, int r_dest); + void LoadValueDirectFixed(RegLocation rl_src, int r_dest); + void LoadValueDirectWide(RegLocation rl_src, int reg_lo, int reg_hi); + void LoadValueDirectWideFixed(RegLocation rl_src, int reg_lo, int reg_hi); + LIR* StoreWordDisp(int rBase, int displacement, int r_src); + void StoreValue(RegLocation rl_dest, RegLocation rl_src); + void StoreValueWide(RegLocation rl_dest, RegLocation rl_src); + + // Shared by all targets - implemented in mir_to_lir.cc. + void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list); + void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir); + bool MethodBlockCodeGen(BasicBlock* bb); + void SpecialMIR2LIR(SpecialCaseHandler special_case); + void MethodMIR2LIR(); + + + + // Required for target - codegen helpers. + virtual bool SmallLiteralDivide(Instruction::Code dalvik_opcode, + RegLocation rl_src, RegLocation rl_dest, int lit) = 0; + virtual int LoadHelper(int offset) = 0; + virtual LIR* LoadBaseDisp(int rBase, int displacement, int r_dest, OpSize size, int s_reg) = 0; + virtual LIR* LoadBaseDispWide(int rBase, int displacement, int r_dest_lo, int r_dest_hi, + int s_reg) = 0; + virtual LIR* LoadBaseIndexed(int rBase, int r_index, int r_dest, int scale, OpSize size) = 0; + virtual LIR* LoadBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_dest, int r_dest_hi, OpSize size, int s_reg) = 0; + virtual LIR* LoadConstantNoClobber(int r_dest, int value) = 0; + virtual LIR* LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) = 0; + virtual LIR* StoreBaseDisp(int rBase, int displacement, int r_src, OpSize size) = 0; + virtual LIR* StoreBaseDispWide(int rBase, int displacement, int r_src_lo, int r_src_hi) = 0; + virtual LIR* StoreBaseIndexed(int rBase, int r_index, int r_src, int scale, OpSize size) = 0; + virtual LIR* StoreBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_src, int r_src_hi, OpSize size, int s_reg) = 0; + virtual void MarkGCCard(int val_reg, int tgt_addr_reg) = 0; + + // Required for target - register utilities. + virtual bool IsFpReg(int reg) = 0; + virtual bool SameRegType(int reg1, int reg2) = 0; + virtual int AllocTypedTemp(bool fp_hint, int reg_class) = 0; + virtual int AllocTypedTempPair(bool fp_hint, int reg_class) = 0; + virtual int S2d(int low_reg, int high_reg) = 0; + virtual int TargetReg(SpecialTargetRegister reg) = 0; + virtual RegisterInfo* GetRegInfo(int reg) = 0; + virtual RegLocation GetReturnAlt() = 0; + virtual RegLocation GetReturnWideAlt() = 0; + virtual RegLocation LocCReturn() = 0; + virtual RegLocation LocCReturnDouble() = 0; + virtual RegLocation LocCReturnFloat() = 0; + virtual RegLocation LocCReturnWide() = 0; + virtual uint32_t FpRegMask() = 0; + virtual uint64_t GetRegMaskCommon(int reg) = 0; + virtual void AdjustSpillMask() = 0; + virtual void ClobberCalleeSave() = 0; + virtual void FlushReg(int reg) = 0; + virtual void FlushRegWide(int reg1, int reg2) = 0; + virtual void FreeCallTemps() = 0; + virtual void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) = 0; + virtual void LockCallTemps() = 0; + virtual void MarkPreservedSingle(int v_reg, int reg) = 0; + virtual void CompilerInitializeRegAlloc() = 0; + + // Required for target - miscellaneous. + virtual AssemblerStatus AssembleInstructions(uintptr_t start_addr) = 0; + virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0; + virtual void SetupTargetResourceMasks(LIR* lir) = 0; + virtual const char* GetTargetInstFmt(int opcode) = 0; + virtual const char* GetTargetInstName(int opcode) = 0; + virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0; + virtual uint64_t GetPCUseDefEncoding() = 0; + virtual uint64_t GetTargetInstFlags(int opcode) = 0; + virtual int GetInsnSize(LIR* lir) = 0; + virtual bool IsUnconditionalBranch(LIR* lir) = 0; + + // Required for target - Dalvik-level generators. + virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) = 0; + virtual void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenArithOpDouble(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) = 0; + virtual void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) = 0; + virtual void GenConversion(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src) = 0; + virtual bool GenInlinedCas32(CallInfo* info, bool need_write_barrier) = 0; + virtual bool GenInlinedMinMaxInt(CallInfo* info, bool is_min) = 0; + virtual bool GenInlinedSqrt(CallInfo* info) = 0; + virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0; + virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, + int offset, ThrowKind kind) = 0; + virtual RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, + bool is_div) = 0; + virtual RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit, + bool is_div) = 0; + virtual void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) = 0; + virtual void GenDivZeroCheck(int reg_lo, int reg_hi) = 0; + virtual void GenEntrySequence(RegLocation* ArgLocs, + RegLocation rl_method) = 0; + virtual void GenExitSequence() = 0; + virtual void GenFillArrayData(uint32_t table_offset, + RegLocation rl_src) = 0; + virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, + bool is_double) = 0; + virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0; + virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0; + virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0; + virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src) = 0; + virtual void GenMonitorExit(int opt_flags, RegLocation rl_src) = 0; + virtual void GenMoveException(RegLocation rl_dest) = 0; + virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, + RegLocation rl_result, int lit, int first_bit, + int second_bit) = 0; + virtual void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) = 0; + virtual void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) = 0; + virtual void GenPackedSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) = 0; + virtual void GenSparseSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) = 0; + virtual void GenSpecialCase(BasicBlock* bb, MIR* mir, + SpecialCaseHandler special_case) = 0; + virtual void GenArrayObjPut(int opt_flags, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) = 0; + virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale) = 0; + virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) = 0; + virtual void GenShiftImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_shift) = 0; + + // Required for target - single operation generators. + virtual LIR* OpUnconditionalBranch(LIR* target) = 0; + virtual LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, + LIR* target) = 0; + virtual LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, + LIR* target) = 0; + virtual LIR* OpCondBranch(ConditionCode cc, LIR* target) = 0; + virtual LIR* OpDecAndBranch(ConditionCode c_code, int reg, + LIR* target) = 0; + virtual LIR* OpFpRegCopy(int r_dest, int r_src) = 0; + virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0; + virtual LIR* OpMem(OpKind op, int rBase, int disp) = 0; + virtual LIR* OpPcRelLoad(int reg, LIR* target) = 0; + virtual LIR* OpReg(OpKind op, int r_dest_src) = 0; + virtual LIR* OpRegCopy(int r_dest, int r_src) = 0; + virtual LIR* OpRegCopyNoInsert(int r_dest, int r_src) = 0; + virtual LIR* OpRegImm(OpKind op, int r_dest_src1, int value) = 0; + virtual LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset) = 0; + virtual LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2) = 0; + virtual LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) = 0; + virtual LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, + int r_src2) = 0; + virtual LIR* OpTestSuspend(LIR* target) = 0; + virtual LIR* OpThreadMem(OpKind op, int thread_offset) = 0; + virtual LIR* OpVldm(int rBase, int count) = 0; + virtual LIR* OpVstm(int rBase, int count) = 0; + virtual void OpLea(int rBase, int reg1, int reg2, int scale, + int offset) = 0; + virtual void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, + int src_hi) = 0; + virtual void OpTlsCmp(int offset, int val) = 0; + virtual bool InexpensiveConstantInt(int32_t value) = 0; + virtual bool InexpensiveConstantFloat(int32_t value) = 0; + virtual bool InexpensiveConstantLong(int64_t value) = 0; + virtual bool InexpensiveConstantDouble(int64_t value) = 0; + + // Temp workaround + void Workaround7250540(RegLocation rl_dest, int value); + + protected: + Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + + CompilationUnit* GetCompilationUnit() { + return cu_; + } + + private: + void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src); + void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, + bool type_known_abstract, bool use_declaring_class, + bool can_assume_type_is_in_dex_cache, + uint32_t type_idx, RegLocation rl_dest, + RegLocation rl_src); + + void ClobberBody(RegisterInfo* p); + void ResetDefBody(RegisterInfo* p) { + p->def_start = NULL; + p->def_end = NULL; + } + + public: + // TODO: add accessors for these. + LIR* literal_list_; // Constants. + LIR* method_literal_list_; // Method literals requiring patching. + LIR* code_literal_list_; // Code literals requiring patching. + + protected: + CompilationUnit* const cu_; + MIRGraph* const mir_graph_; + GrowableArray<SwitchTable*> switch_tables_; + GrowableArray<FillArrayData*> fill_array_data_; + GrowableArray<LIR*> throw_launchpads_; + GrowableArray<LIR*> suspend_launchpads_; + GrowableArray<LIR*> intrinsic_launchpads_; + SafeMap<unsigned int, LIR*> boundary_map_; // boundary lookup cache. + /* + * Holds mapping from native PC to dex PC for safepoints where we may deoptimize. + * Native PC is on the return address of the safepointed operation. Dex PC is for + * the instruction being executed at the safepoint. + */ + std::vector<uint32_t> pc2dex_mapping_table_; + /* + * Holds mapping from Dex PC to native PC for catch entry points. Native PC and Dex PC + * immediately preceed the instruction. + */ + std::vector<uint32_t> dex2pc_mapping_table_; + int data_offset_; // starting offset of literal pool. + int total_size_; // header + code size. + LIR* block_label_list_; + PromotionMap* promotion_map_; + /* + * TODO: The code generation utilities don't have a built-in + * mechanism to propagate the original Dalvik opcode address to the + * associated generated instructions. For the trace compiler, this wasn't + * necessary because the interpreter handled all throws and debugging + * requests. For now we'll handle this by placing the Dalvik offset + * in the CompilationUnit struct before codegen for each instruction. + * The low-level LIR creation utilites will pull it from here. Rework this. + */ + int current_dalvik_offset_; + RegisterPool* reg_pool_; + /* + * Sanity checking for the register temp tracking. The same ssa + * name should never be associated with one temp register per + * instruction compilation. + */ + int live_sreg_; + CodeBuffer code_buffer_; + std::vector<uint32_t> combined_mapping_table_; + std::vector<uint32_t> core_vmap_table_; + std::vector<uint32_t> fp_vmap_table_; + std::vector<uint8_t> native_gc_map_; + int num_core_spills_; + int num_fp_spills_; + int frame_size_; + unsigned int core_spill_mask_; + unsigned int fp_spill_mask_; + LIR* first_lir_insn_; + LIR* last_lir_insn_; +}; // Class Mir2Lir + +} // namespace art + +#endif //ART_SRC_COMPILER_DEX_QUICK_MIR_TO_LIR_H_ diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc new file mode 100644 index 0000000000..8f43542098 --- /dev/null +++ b/compiler/dex/quick/ralloc_util.cc @@ -0,0 +1,1237 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains register alloction support. */ + +#include "dex/compiler_ir.h" +#include "dex/compiler_internals.h" +#include "mir_to_lir-inl.h" + +namespace art { + +/* + * Free all allocated temps in the temp pools. Note that this does + * not affect the "liveness" of a temp register, which will stay + * live until it is either explicitly killed or reallocated. + */ +void Mir2Lir::ResetRegPool() +{ + int i; + for (i=0; i < reg_pool_->num_core_regs; i++) { + if (reg_pool_->core_regs[i].is_temp) + reg_pool_->core_regs[i].in_use = false; + } + for (i=0; i < reg_pool_->num_fp_regs; i++) { + if (reg_pool_->FPRegs[i].is_temp) + reg_pool_->FPRegs[i].in_use = false; + } + // Reset temp tracking sanity check. + if (kIsDebugBuild) { + live_sreg_ = INVALID_SREG; + } +} + + /* + * Set up temp & preserved register pools specialized by target. + * Note: num_regs may be zero. + */ +void Mir2Lir::CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num) +{ + int i; + for (i=0; i < num; i++) { + regs[i].reg = reg_nums[i]; + regs[i].in_use = false; + regs[i].is_temp = false; + regs[i].pair = false; + regs[i].live = false; + regs[i].dirty = false; + regs[i].s_reg = INVALID_SREG; + } +} + +void Mir2Lir::DumpRegPool(RegisterInfo* p, int num_regs) +{ + LOG(INFO) << "================================================"; + for (int i = 0; i < num_regs; i++) { + LOG(INFO) << StringPrintf( + "R[%d]: T:%d, U:%d, P:%d, p:%d, LV:%d, D:%d, SR:%d, ST:%x, EN:%x", + p[i].reg, p[i].is_temp, p[i].in_use, p[i].pair, p[i].partner, + p[i].live, p[i].dirty, p[i].s_reg, reinterpret_cast<uintptr_t>(p[i].def_start), + reinterpret_cast<uintptr_t>(p[i].def_end)); + } + LOG(INFO) << "================================================"; +} + +void Mir2Lir::DumpCoreRegPool() +{ + DumpRegPool(reg_pool_->core_regs, reg_pool_->num_core_regs); +} + +void Mir2Lir::DumpFpRegPool() +{ + DumpRegPool(reg_pool_->FPRegs, reg_pool_->num_fp_regs); +} + +void Mir2Lir::ClobberSRegBody(RegisterInfo* p, int num_regs, int s_reg) +{ + int i; + for (i=0; i< num_regs; i++) { + if (p[i].s_reg == s_reg) { + if (p[i].is_temp) { + p[i].live = false; + } + p[i].def_start = NULL; + p[i].def_end = NULL; + } + } +} + +/* + * Break the association between a Dalvik vreg and a physical temp register of either register + * class. + * TODO: Ideally, the public version of this code should not exist. Besides its local usage + * in the register utilities, is is also used by code gen routines to work around a deficiency in + * local register allocation, which fails to distinguish between the "in" and "out" identities + * of Dalvik vregs. This can result in useless register copies when the same Dalvik vreg + * is used both as the source and destination register of an operation in which the type + * changes (for example: INT_TO_FLOAT v1, v1). Revisit when improved register allocation is + * addressed. + */ +void Mir2Lir::ClobberSReg(int s_reg) +{ + /* Reset live temp tracking sanity checker */ + if (kIsDebugBuild) { + if (s_reg == live_sreg_) { + live_sreg_ = INVALID_SREG; + } + } + ClobberSRegBody(reg_pool_->core_regs, reg_pool_->num_core_regs, s_reg); + ClobberSRegBody(reg_pool_->FPRegs, reg_pool_->num_fp_regs, s_reg); +} + +/* + * SSA names associated with the initial definitions of Dalvik + * registers are the same as the Dalvik register number (and + * thus take the same position in the promotion_map. However, + * the special Method* and compiler temp resisters use negative + * v_reg numbers to distinguish them and can have an arbitrary + * ssa name (above the last original Dalvik register). This function + * maps SSA names to positions in the promotion_map array. + */ +int Mir2Lir::SRegToPMap(int s_reg) +{ + DCHECK_LT(s_reg, mir_graph_->GetNumSSARegs()); + DCHECK_GE(s_reg, 0); + int v_reg = mir_graph_->SRegToVReg(s_reg); + if (v_reg >= 0) { + DCHECK_LT(v_reg, cu_->num_dalvik_registers); + return v_reg; + } else { + int pos = std::abs(v_reg) - std::abs(SSA_METHOD_BASEREG); + DCHECK_LE(pos, cu_->num_compiler_temps); + return cu_->num_dalvik_registers + pos; + } +} + +void Mir2Lir::RecordCorePromotion(int reg, int s_reg) +{ + int p_map_idx = SRegToPMap(s_reg); + int v_reg = mir_graph_->SRegToVReg(s_reg); + GetRegInfo(reg)->in_use = true; + core_spill_mask_ |= (1 << reg); + // Include reg for later sort + core_vmap_table_.push_back(reg << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1))); + num_core_spills_++; + promotion_map_[p_map_idx].core_location = kLocPhysReg; + promotion_map_[p_map_idx].core_reg = reg; +} + +/* Reserve a callee-save register. Return -1 if none available */ +int Mir2Lir::AllocPreservedCoreReg(int s_reg) +{ + int res = -1; + RegisterInfo* core_regs = reg_pool_->core_regs; + for (int i = 0; i < reg_pool_->num_core_regs; i++) { + if (!core_regs[i].is_temp && !core_regs[i].in_use) { + res = core_regs[i].reg; + RecordCorePromotion(res, s_reg); + break; + } + } + return res; +} + +void Mir2Lir::RecordFpPromotion(int reg, int s_reg) +{ + int p_map_idx = SRegToPMap(s_reg); + int v_reg = mir_graph_->SRegToVReg(s_reg); + GetRegInfo(reg)->in_use = true; + MarkPreservedSingle(v_reg, reg); + promotion_map_[p_map_idx].fp_location = kLocPhysReg; + promotion_map_[p_map_idx].FpReg = reg; +} + +/* + * Reserve a callee-save fp single register. Try to fullfill request for + * even/odd allocation, but go ahead and allocate anything if not + * available. If nothing's available, return -1. + */ +int Mir2Lir::AllocPreservedSingle(int s_reg, bool even) +{ + int res = -1; + RegisterInfo* FPRegs = reg_pool_->FPRegs; + for (int i = 0; i < reg_pool_->num_fp_regs; i++) { + if (!FPRegs[i].is_temp && !FPRegs[i].in_use && + ((FPRegs[i].reg & 0x1) == 0) == even) { + res = FPRegs[i].reg; + RecordFpPromotion(res, s_reg); + break; + } + } + return res; +} + +/* + * Somewhat messy code here. We want to allocate a pair of contiguous + * physical single-precision floating point registers starting with + * an even numbered reg. It is possible that the paired s_reg (s_reg+1) + * has already been allocated - try to fit if possible. Fail to + * allocate if we can't meet the requirements for the pair of + * s_reg<=sX[even] & (s_reg+1)<= sX+1. + */ +int Mir2Lir::AllocPreservedDouble(int s_reg) +{ + int res = -1; // Assume failure + int v_reg = mir_graph_->SRegToVReg(s_reg); + int p_map_idx = SRegToPMap(s_reg); + if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) { + // Upper reg is already allocated. Can we fit? + int high_reg = promotion_map_[p_map_idx+1].FpReg; + if ((high_reg & 1) == 0) { + // High reg is even - fail. + return res; + } + // Is the low reg of the pair free? + RegisterInfo* p = GetRegInfo(high_reg-1); + if (p->in_use || p->is_temp) { + // Already allocated or not preserved - fail. + return res; + } + // OK - good to go. + res = p->reg; + p->in_use = true; + DCHECK_EQ((res & 1), 0); + MarkPreservedSingle(v_reg, res); + } else { + RegisterInfo* FPRegs = reg_pool_->FPRegs; + for (int i = 0; i < reg_pool_->num_fp_regs; i++) { + if (!FPRegs[i].is_temp && !FPRegs[i].in_use && + ((FPRegs[i].reg & 0x1) == 0x0) && + !FPRegs[i+1].is_temp && !FPRegs[i+1].in_use && + ((FPRegs[i+1].reg & 0x1) == 0x1) && + (FPRegs[i].reg + 1) == FPRegs[i+1].reg) { + res = FPRegs[i].reg; + FPRegs[i].in_use = true; + MarkPreservedSingle(v_reg, res); + FPRegs[i+1].in_use = true; + DCHECK_EQ(res + 1, FPRegs[i+1].reg); + MarkPreservedSingle(v_reg+1, res+1); + break; + } + } + } + if (res != -1) { + promotion_map_[p_map_idx].fp_location = kLocPhysReg; + promotion_map_[p_map_idx].FpReg = res; + promotion_map_[p_map_idx+1].fp_location = kLocPhysReg; + promotion_map_[p_map_idx+1].FpReg = res + 1; + } + return res; +} + + +/* + * Reserve a callee-save fp register. If this register can be used + * as the first of a double, attempt to allocate an even pair of fp + * single regs (but if can't still attempt to allocate a single, preferring + * first to allocate an odd register. + */ +int Mir2Lir::AllocPreservedFPReg(int s_reg, bool double_start) +{ + int res = -1; + if (double_start) { + res = AllocPreservedDouble(s_reg); + } + if (res == -1) { + res = AllocPreservedSingle(s_reg, false /* try odd # */); + } + if (res == -1) + res = AllocPreservedSingle(s_reg, true /* try even # */); + return res; +} + +int Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, + bool required) +{ + int i; + int next = *next_temp; + for (i=0; i< num_regs; i++) { + if (next >= num_regs) + next = 0; + if (p[next].is_temp && !p[next].in_use && !p[next].live) { + Clobber(p[next].reg); + p[next].in_use = true; + p[next].pair = false; + *next_temp = next + 1; + return p[next].reg; + } + next++; + } + next = *next_temp; + for (i=0; i< num_regs; i++) { + if (next >= num_regs) + next = 0; + if (p[next].is_temp && !p[next].in_use) { + Clobber(p[next].reg); + p[next].in_use = true; + p[next].pair = false; + *next_temp = next + 1; + return p[next].reg; + } + next++; + } + if (required) { + CodegenDump(); + DumpRegPool(reg_pool_->core_regs, + reg_pool_->num_core_regs); + LOG(FATAL) << "No free temp registers"; + } + return -1; // No register available +} + +//REDO: too many assumptions. +int Mir2Lir::AllocTempDouble() +{ + RegisterInfo* p = reg_pool_->FPRegs; + int num_regs = reg_pool_->num_fp_regs; + /* Start looking at an even reg */ + int next = reg_pool_->next_fp_reg & ~0x1; + + // First try to avoid allocating live registers + for (int i=0; i < num_regs; i+=2) { + if (next >= num_regs) + next = 0; + if ((p[next].is_temp && !p[next].in_use && !p[next].live) && + (p[next+1].is_temp && !p[next+1].in_use && !p[next+1].live)) { + Clobber(p[next].reg); + Clobber(p[next+1].reg); + p[next].in_use = true; + p[next+1].in_use = true; + DCHECK_EQ((p[next].reg+1), p[next+1].reg); + DCHECK_EQ((p[next].reg & 0x1), 0); + reg_pool_->next_fp_reg = next + 2; + if (reg_pool_->next_fp_reg >= num_regs) { + reg_pool_->next_fp_reg = 0; + } + return p[next].reg; + } + next += 2; + } + next = reg_pool_->next_fp_reg & ~0x1; + + // No choice - find a pair and kill it. + for (int i=0; i < num_regs; i+=2) { + if (next >= num_regs) + next = 0; + if (p[next].is_temp && !p[next].in_use && p[next+1].is_temp && + !p[next+1].in_use) { + Clobber(p[next].reg); + Clobber(p[next+1].reg); + p[next].in_use = true; + p[next+1].in_use = true; + DCHECK_EQ((p[next].reg+1), p[next+1].reg); + DCHECK_EQ((p[next].reg & 0x1), 0); + reg_pool_->next_fp_reg = next + 2; + if (reg_pool_->next_fp_reg >= num_regs) { + reg_pool_->next_fp_reg = 0; + } + return p[next].reg; + } + next += 2; + } + LOG(FATAL) << "No free temp registers (pair)"; + return -1; +} + +/* Return a temp if one is available, -1 otherwise */ +int Mir2Lir::AllocFreeTemp() +{ + return AllocTempBody(reg_pool_->core_regs, + reg_pool_->num_core_regs, + ®_pool_->next_core_reg, true); +} + +int Mir2Lir::AllocTemp() +{ + return AllocTempBody(reg_pool_->core_regs, + reg_pool_->num_core_regs, + ®_pool_->next_core_reg, true); +} + +int Mir2Lir::AllocTempFloat() +{ + return AllocTempBody(reg_pool_->FPRegs, + reg_pool_->num_fp_regs, + ®_pool_->next_fp_reg, true); +} + +Mir2Lir::RegisterInfo* Mir2Lir::AllocLiveBody(RegisterInfo* p, int num_regs, int s_reg) +{ + int i; + if (s_reg == -1) + return NULL; + for (i=0; i < num_regs; i++) { + if (p[i].live && (p[i].s_reg == s_reg)) { + if (p[i].is_temp) + p[i].in_use = true; + return &p[i]; + } + } + return NULL; +} + +Mir2Lir::RegisterInfo* Mir2Lir::AllocLive(int s_reg, int reg_class) +{ + RegisterInfo* res = NULL; + switch (reg_class) { + case kAnyReg: + res = AllocLiveBody(reg_pool_->FPRegs, + reg_pool_->num_fp_regs, s_reg); + if (res) + break; + /* Intentional fallthrough */ + case kCoreReg: + res = AllocLiveBody(reg_pool_->core_regs, + reg_pool_->num_core_regs, s_reg); + break; + case kFPReg: + res = AllocLiveBody(reg_pool_->FPRegs, + reg_pool_->num_fp_regs, s_reg); + break; + default: + LOG(FATAL) << "Invalid register type"; + } + return res; +} + +void Mir2Lir::FreeTemp(int reg) +{ + RegisterInfo* p = reg_pool_->core_regs; + int num_regs = reg_pool_->num_core_regs; + int i; + for (i=0; i< num_regs; i++) { + if (p[i].reg == reg) { + if (p[i].is_temp) { + p[i].in_use = false; + } + p[i].pair = false; + return; + } + } + p = reg_pool_->FPRegs; + num_regs = reg_pool_->num_fp_regs; + for (i=0; i< num_regs; i++) { + if (p[i].reg == reg) { + if (p[i].is_temp) { + p[i].in_use = false; + } + p[i].pair = false; + return; + } + } + LOG(FATAL) << "Tried to free a non-existant temp: r" << reg; +} + +Mir2Lir::RegisterInfo* Mir2Lir::IsLive(int reg) +{ + RegisterInfo* p = reg_pool_->core_regs; + int num_regs = reg_pool_->num_core_regs; + int i; + for (i=0; i< num_regs; i++) { + if (p[i].reg == reg) { + return p[i].live ? &p[i] : NULL; + } + } + p = reg_pool_->FPRegs; + num_regs = reg_pool_->num_fp_regs; + for (i=0; i< num_regs; i++) { + if (p[i].reg == reg) { + return p[i].live ? &p[i] : NULL; + } + } + return NULL; +} + +Mir2Lir::RegisterInfo* Mir2Lir::IsTemp(int reg) +{ + RegisterInfo* p = GetRegInfo(reg); + return (p->is_temp) ? p : NULL; +} + +Mir2Lir::RegisterInfo* Mir2Lir::IsPromoted(int reg) +{ + RegisterInfo* p = GetRegInfo(reg); + return (p->is_temp) ? NULL : p; +} + +bool Mir2Lir::IsDirty(int reg) +{ + RegisterInfo* p = GetRegInfo(reg); + return p->dirty; +} + +/* + * Similar to AllocTemp(), but forces the allocation of a specific + * register. No check is made to see if the register was previously + * allocated. Use with caution. + */ +void Mir2Lir::LockTemp(int reg) +{ + RegisterInfo* p = reg_pool_->core_regs; + int num_regs = reg_pool_->num_core_regs; + int i; + for (i=0; i< num_regs; i++) { + if (p[i].reg == reg) { + DCHECK(p[i].is_temp); + p[i].in_use = true; + p[i].live = false; + return; + } + } + p = reg_pool_->FPRegs; + num_regs = reg_pool_->num_fp_regs; + for (i=0; i< num_regs; i++) { + if (p[i].reg == reg) { + DCHECK(p[i].is_temp); + p[i].in_use = true; + p[i].live = false; + return; + } + } + LOG(FATAL) << "Tried to lock a non-existant temp: r" << reg; +} + +void Mir2Lir::ResetDef(int reg) +{ + ResetDefBody(GetRegInfo(reg)); +} + +void Mir2Lir::NullifyRange(LIR *start, LIR *finish, int s_reg1, int s_reg2) +{ + if (start && finish) { + LIR *p; + DCHECK_EQ(s_reg1, s_reg2); + for (p = start; ;p = p->next) { + NopLIR(p); + if (p == finish) + break; + } + } +} + +/* + * Mark the beginning and end LIR of a def sequence. Note that + * on entry start points to the LIR prior to the beginning of the + * sequence. + */ +void Mir2Lir::MarkDef(RegLocation rl, LIR *start, LIR *finish) +{ + DCHECK(!rl.wide); + DCHECK(start && start->next); + DCHECK(finish); + RegisterInfo* p = GetRegInfo(rl.low_reg); + p->def_start = start->next; + p->def_end = finish; +} + +/* + * Mark the beginning and end LIR of a def sequence. Note that + * on entry start points to the LIR prior to the beginning of the + * sequence. + */ +void Mir2Lir::MarkDefWide(RegLocation rl, LIR *start, LIR *finish) +{ + DCHECK(rl.wide); + DCHECK(start && start->next); + DCHECK(finish); + RegisterInfo* p = GetRegInfo(rl.low_reg); + ResetDef(rl.high_reg); // Only track low of pair + p->def_start = start->next; + p->def_end = finish; +} + +RegLocation Mir2Lir::WideToNarrow(RegLocation rl) +{ + DCHECK(rl.wide); + if (rl.location == kLocPhysReg) { + RegisterInfo* info_lo = GetRegInfo(rl.low_reg); + RegisterInfo* info_hi = GetRegInfo(rl.high_reg); + if (info_lo->is_temp) { + info_lo->pair = false; + info_lo->def_start = NULL; + info_lo->def_end = NULL; + } + if (info_hi->is_temp) { + info_hi->pair = false; + info_hi->def_start = NULL; + info_hi->def_end = NULL; + } + } + rl.wide = false; + return rl; +} + +void Mir2Lir::ResetDefLoc(RegLocation rl) +{ + DCHECK(!rl.wide); + RegisterInfo* p = IsTemp(rl.low_reg); + if (p && !(cu_->disable_opt & (1 << kSuppressLoads))) { + DCHECK(!p->pair); + NullifyRange(p->def_start, p->def_end, p->s_reg, rl.s_reg_low); + } + ResetDef(rl.low_reg); +} + +void Mir2Lir::ResetDefLocWide(RegLocation rl) +{ + DCHECK(rl.wide); + RegisterInfo* p_low = IsTemp(rl.low_reg); + RegisterInfo* p_high = IsTemp(rl.high_reg); + if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) { + DCHECK(p_low->pair); + NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low); + } + if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) { + DCHECK(p_high->pair); + } + ResetDef(rl.low_reg); + ResetDef(rl.high_reg); +} + +void Mir2Lir::ResetDefTracking() +{ + int i; + for (i=0; i< reg_pool_->num_core_regs; i++) { + ResetDefBody(®_pool_->core_regs[i]); + } + for (i=0; i< reg_pool_->num_fp_regs; i++) { + ResetDefBody(®_pool_->FPRegs[i]); + } +} + +void Mir2Lir::ClobberAllRegs() +{ + int i; + for (i=0; i< reg_pool_->num_core_regs; i++) { + ClobberBody(®_pool_->core_regs[i]); + } + for (i=0; i< reg_pool_->num_fp_regs; i++) { + ClobberBody(®_pool_->FPRegs[i]); + } +} + +// Make sure nothing is live and dirty +void Mir2Lir::FlushAllRegsBody(RegisterInfo* info, int num_regs) +{ + int i; + for (i=0; i < num_regs; i++) { + if (info[i].live && info[i].dirty) { + if (info[i].pair) { + FlushRegWide(info[i].reg, info[i].partner); + } else { + FlushReg(info[i].reg); + } + } + } +} + +void Mir2Lir::FlushAllRegs() +{ + FlushAllRegsBody(reg_pool_->core_regs, + reg_pool_->num_core_regs); + FlushAllRegsBody(reg_pool_->FPRegs, + reg_pool_->num_fp_regs); + ClobberAllRegs(); +} + + +//TUNING: rewrite all of this reg stuff. Probably use an attribute table +bool Mir2Lir::RegClassMatches(int reg_class, int reg) +{ + if (reg_class == kAnyReg) { + return true; + } else if (reg_class == kCoreReg) { + return !IsFpReg(reg); + } else { + return IsFpReg(reg); + } +} + +void Mir2Lir::MarkLive(int reg, int s_reg) +{ + RegisterInfo* info = GetRegInfo(reg); + if ((info->reg == reg) && (info->s_reg == s_reg) && info->live) { + return; /* already live */ + } else if (s_reg != INVALID_SREG) { + ClobberSReg(s_reg); + if (info->is_temp) { + info->live = true; + } + } else { + /* Can't be live if no associated s_reg */ + DCHECK(info->is_temp); + info->live = false; + } + info->s_reg = s_reg; +} + +void Mir2Lir::MarkTemp(int reg) +{ + RegisterInfo* info = GetRegInfo(reg); + info->is_temp = true; +} + +void Mir2Lir::UnmarkTemp(int reg) +{ + RegisterInfo* info = GetRegInfo(reg); + info->is_temp = false; +} + +void Mir2Lir::MarkPair(int low_reg, int high_reg) +{ + RegisterInfo* info_lo = GetRegInfo(low_reg); + RegisterInfo* info_hi = GetRegInfo(high_reg); + info_lo->pair = info_hi->pair = true; + info_lo->partner = high_reg; + info_hi->partner = low_reg; +} + +void Mir2Lir::MarkClean(RegLocation loc) +{ + RegisterInfo* info = GetRegInfo(loc.low_reg); + info->dirty = false; + if (loc.wide) { + info = GetRegInfo(loc.high_reg); + info->dirty = false; + } +} + +void Mir2Lir::MarkDirty(RegLocation loc) +{ + if (loc.home) { + // If already home, can't be dirty + return; + } + RegisterInfo* info = GetRegInfo(loc.low_reg); + info->dirty = true; + if (loc.wide) { + info = GetRegInfo(loc.high_reg); + info->dirty = true; + } +} + +void Mir2Lir::MarkInUse(int reg) +{ + RegisterInfo* info = GetRegInfo(reg); + info->in_use = true; +} + +void Mir2Lir::CopyRegInfo(int new_reg, int old_reg) +{ + RegisterInfo* new_info = GetRegInfo(new_reg); + RegisterInfo* old_info = GetRegInfo(old_reg); + // Target temp status must not change + bool is_temp = new_info->is_temp; + *new_info = *old_info; + // Restore target's temp status + new_info->is_temp = is_temp; + new_info->reg = new_reg; +} + +bool Mir2Lir::CheckCorePoolSanity() +{ + for (static int i = 0; i < reg_pool_->num_core_regs; i++) { + if (reg_pool_->core_regs[i].pair) { + static int my_reg = reg_pool_->core_regs[i].reg; + static int my_sreg = reg_pool_->core_regs[i].s_reg; + static int partner_reg = reg_pool_->core_regs[i].partner; + static RegisterInfo* partner = GetRegInfo(partner_reg); + DCHECK(partner != NULL); + DCHECK(partner->pair); + DCHECK_EQ(my_reg, partner->partner); + static int partner_sreg = partner->s_reg; + if (my_sreg == INVALID_SREG) { + DCHECK_EQ(partner_sreg, INVALID_SREG); + } else { + int diff = my_sreg - partner_sreg; + DCHECK((diff == -1) || (diff == 1)); + } + } + if (!reg_pool_->core_regs[i].live) { + DCHECK(reg_pool_->core_regs[i].def_start == NULL); + DCHECK(reg_pool_->core_regs[i].def_end == NULL); + } + } + return true; +} + +/* + * Return an updated location record with current in-register status. + * If the value lives in live temps, reflect that fact. No code + * is generated. If the live value is part of an older pair, + * clobber both low and high. + * TUNING: clobbering both is a bit heavy-handed, but the alternative + * is a bit complex when dealing with FP regs. Examine code to see + * if it's worthwhile trying to be more clever here. + */ + +RegLocation Mir2Lir::UpdateLoc(RegLocation loc) +{ + DCHECK(!loc.wide); + DCHECK(CheckCorePoolSanity()); + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); + RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg); + if (info_lo) { + if (info_lo->pair) { + Clobber(info_lo->reg); + Clobber(info_lo->partner); + FreeTemp(info_lo->reg); + } else { + loc.low_reg = info_lo->reg; + loc.location = kLocPhysReg; + } + } + } + + return loc; +} + +/* see comments for update_loc */ +RegLocation Mir2Lir::UpdateLocWide(RegLocation loc) +{ + DCHECK(loc.wide); + DCHECK(CheckCorePoolSanity()); + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); + // Are the dalvik regs already live in physical registers? + RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg); + RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg); + bool match = true; + match = match && (info_lo != NULL); + match = match && (info_hi != NULL); + // Are they both core or both FP? + match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg)); + // If a pair of floating point singles, are they properly aligned? + if (match && IsFpReg(info_lo->reg)) { + match &= ((info_lo->reg & 0x1) == 0); + match &= ((info_hi->reg - info_lo->reg) == 1); + } + // If previously used as a pair, it is the same pair? + if (match && (info_lo->pair || info_hi->pair)) { + match = (info_lo->pair == info_hi->pair); + match &= ((info_lo->reg == info_hi->partner) && + (info_hi->reg == info_lo->partner)); + } + if (match) { + // Can reuse - update the register usage info + loc.low_reg = info_lo->reg; + loc.high_reg = info_hi->reg; + loc.location = kLocPhysReg; + MarkPair(loc.low_reg, loc.high_reg); + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + return loc; + } + // Can't easily reuse - clobber and free any overlaps + if (info_lo) { + Clobber(info_lo->reg); + FreeTemp(info_lo->reg); + if (info_lo->pair) + Clobber(info_lo->partner); + } + if (info_hi) { + Clobber(info_hi->reg); + FreeTemp(info_hi->reg); + if (info_hi->pair) + Clobber(info_hi->partner); + } + } + return loc; +} + + +/* For use in cases we don't know (or care) width */ +RegLocation Mir2Lir::UpdateRawLoc(RegLocation loc) +{ + if (loc.wide) + return UpdateLocWide(loc); + else + return UpdateLoc(loc); +} + +RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) +{ + DCHECK(loc.wide); + int new_regs; + int low_reg; + int high_reg; + + loc = UpdateLocWide(loc); + + /* If already in registers, we can assume proper form. Right reg class? */ + if (loc.location == kLocPhysReg) { + DCHECK_EQ(IsFpReg(loc.low_reg), IsFpReg(loc.high_reg)); + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + if (!RegClassMatches(reg_class, loc.low_reg)) { + /* Wrong register class. Reallocate and copy */ + new_regs = AllocTypedTempPair(loc.fp, reg_class); + low_reg = new_regs & 0xff; + high_reg = (new_regs >> 8) & 0xff; + OpRegCopyWide(low_reg, high_reg, loc.low_reg, loc.high_reg); + CopyRegInfo(low_reg, loc.low_reg); + CopyRegInfo(high_reg, loc.high_reg); + Clobber(loc.low_reg); + Clobber(loc.high_reg); + loc.low_reg = low_reg; + loc.high_reg = high_reg; + MarkPair(loc.low_reg, loc.high_reg); + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + } + return loc; + } + + DCHECK_NE(loc.s_reg_low, INVALID_SREG); + DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG); + + new_regs = AllocTypedTempPair(loc.fp, reg_class); + loc.low_reg = new_regs & 0xff; + loc.high_reg = (new_regs >> 8) & 0xff; + + MarkPair(loc.low_reg, loc.high_reg); + if (update) { + loc.location = kLocPhysReg; + MarkLive(loc.low_reg, loc.s_reg_low); + MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low)); + } + DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0)); + return loc; +} + +RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) +{ + int new_reg; + + if (loc.wide) + return EvalLocWide(loc, reg_class, update); + + loc = UpdateLoc(loc); + + if (loc.location == kLocPhysReg) { + if (!RegClassMatches(reg_class, loc.low_reg)) { + /* Wrong register class. Realloc, copy and transfer ownership */ + new_reg = AllocTypedTemp(loc.fp, reg_class); + OpRegCopy(new_reg, loc.low_reg); + CopyRegInfo(new_reg, loc.low_reg); + Clobber(loc.low_reg); + loc.low_reg = new_reg; + } + return loc; + } + + DCHECK_NE(loc.s_reg_low, INVALID_SREG); + + new_reg = AllocTypedTemp(loc.fp, reg_class); + loc.low_reg = new_reg; + + if (update) { + loc.location = kLocPhysReg; + MarkLive(loc.low_reg, loc.s_reg_low); + } + return loc; +} + +/* USE SSA names to count references of base Dalvik v_regs. */ +void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts) { + for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { + RegLocation loc = mir_graph_->reg_location_[i]; + RefCounts* counts = loc.fp ? fp_counts : core_counts; + int p_map_idx = SRegToPMap(loc.s_reg_low); + //Don't count easily regenerated immediates + if (loc.fp || !IsInexpensiveConstant(loc)) { + counts[p_map_idx].count += mir_graph_->GetUseCount(i); + } + if (loc.wide && loc.fp && !loc.high_word) { + counts[p_map_idx].double_start = true; + } + } +} + +/* qsort callback function, sort descending */ +static int SortCounts(const void *val1, const void *val2) +{ + const Mir2Lir::RefCounts* op1 = reinterpret_cast<const Mir2Lir::RefCounts*>(val1); + const Mir2Lir::RefCounts* op2 = reinterpret_cast<const Mir2Lir::RefCounts*>(val2); + return (op1->count == op2->count) ? 0 : (op1->count < op2->count ? 1 : -1); +} + +void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) +{ + LOG(INFO) << msg; + for (int i = 0; i < size; i++) { + LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count; + } +} + +/* + * Note: some portions of this code required even if the kPromoteRegs + * optimization is disabled. + */ +void Mir2Lir::DoPromotion() +{ + int reg_bias = cu_->num_compiler_temps + 1; + int dalvik_regs = cu_->num_dalvik_registers; + int num_regs = dalvik_regs + reg_bias; + const int promotion_threshold = 1; + + // Allow target code to add any special registers + AdjustSpillMask(); + + /* + * Simple register promotion. Just do a static count of the uses + * of Dalvik registers. Note that we examine the SSA names, but + * count based on original Dalvik register name. Count refs + * separately based on type in order to give allocation + * preference to fp doubles - which must be allocated sequential + * physical single fp registers started with an even-numbered + * reg. + * TUNING: replace with linear scan once we have the ability + * to describe register live ranges for GC. + */ + RefCounts *core_regs = + static_cast<RefCounts*>(arena_->NewMem(sizeof(RefCounts) * num_regs, true, + ArenaAllocator::kAllocRegAlloc)); + RefCounts *FpRegs = + static_cast<RefCounts *>(arena_->NewMem(sizeof(RefCounts) * num_regs, true, + ArenaAllocator::kAllocRegAlloc)); + // Set ssa names for original Dalvik registers + for (int i = 0; i < dalvik_regs; i++) { + core_regs[i].s_reg = FpRegs[i].s_reg = i; + } + // Set ssa name for Method* + core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); + FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); // For consistecy + // Set ssa names for compiler_temps + for (int i = 1; i <= cu_->num_compiler_temps; i++) { + CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i); + core_regs[dalvik_regs + i].s_reg = ct->s_reg; + FpRegs[dalvik_regs + i].s_reg = ct->s_reg; + } + + // Sum use counts of SSA regs by original Dalvik vreg. + CountRefs(core_regs, FpRegs); + + /* + * Ideally, we'd allocate doubles starting with an even-numbered + * register. Bias the counts to try to allocate any vreg that's + * used as the start of a pair first. + */ + for (int i = 0; i < num_regs; i++) { + if (FpRegs[i].double_start) { + FpRegs[i].count *= 2; + } + } + + // Sort the count arrays + qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts); + qsort(FpRegs, num_regs, sizeof(RefCounts), SortCounts); + + if (cu_->verbose) { + DumpCounts(core_regs, num_regs, "Core regs after sort"); + DumpCounts(FpRegs, num_regs, "Fp regs after sort"); + } + + if (!(cu_->disable_opt & (1 << kPromoteRegs))) { + // Promote FpRegs + for (int i = 0; (i < num_regs) && + (FpRegs[i].count >= promotion_threshold ); i++) { + int p_map_idx = SRegToPMap(FpRegs[i].s_reg); + if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { + int reg = AllocPreservedFPReg(FpRegs[i].s_reg, + FpRegs[i].double_start); + if (reg < 0) { + break; // No more left + } + } + } + + // Promote core regs + for (int i = 0; (i < num_regs) && + (core_regs[i].count >= promotion_threshold); i++) { + int p_map_idx = SRegToPMap(core_regs[i].s_reg); + if (promotion_map_[p_map_idx].core_location != + kLocPhysReg) { + int reg = AllocPreservedCoreReg(core_regs[i].s_reg); + if (reg < 0) { + break; // No more left + } + } + } + } + + // Now, update SSA names to new home locations + for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { + RegLocation *curr = &mir_graph_->reg_location_[i]; + int p_map_idx = SRegToPMap(curr->s_reg_low); + if (!curr->wide) { + if (curr->fp) { + if (promotion_map_[p_map_idx].fp_location == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->low_reg = promotion_map_[p_map_idx].FpReg; + curr->home = true; + } + } else { + if (promotion_map_[p_map_idx].core_location == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->low_reg = promotion_map_[p_map_idx].core_reg; + curr->home = true; + } + } + curr->high_reg = INVALID_REG; + } else { + if (curr->high_word) { + continue; + } + if (curr->fp) { + if ((promotion_map_[p_map_idx].fp_location == kLocPhysReg) && + (promotion_map_[p_map_idx+1].fp_location == + kLocPhysReg)) { + int low_reg = promotion_map_[p_map_idx].FpReg; + int high_reg = promotion_map_[p_map_idx+1].FpReg; + // Doubles require pair of singles starting at even reg + if (((low_reg & 0x1) == 0) && ((low_reg + 1) == high_reg)) { + curr->location = kLocPhysReg; + curr->low_reg = low_reg; + curr->high_reg = high_reg; + curr->home = true; + } + } + } else { + if ((promotion_map_[p_map_idx].core_location == kLocPhysReg) + && (promotion_map_[p_map_idx+1].core_location == + kLocPhysReg)) { + curr->location = kLocPhysReg; + curr->low_reg = promotion_map_[p_map_idx].core_reg; + curr->high_reg = promotion_map_[p_map_idx+1].core_reg; + curr->home = true; + } + } + } + } + if (cu_->verbose) { + DumpPromotionMap(); + } +} + +/* Returns sp-relative offset in bytes for a VReg */ +int Mir2Lir::VRegOffset(int v_reg) +{ + return StackVisitor::GetVRegOffset(cu_->code_item, core_spill_mask_, + fp_spill_mask_, frame_size_, v_reg); +} + +/* Returns sp-relative offset in bytes for a SReg */ +int Mir2Lir::SRegOffset(int s_reg) +{ + return VRegOffset(mir_graph_->SRegToVReg(s_reg)); +} + +/* Mark register usage state and return long retloc */ +RegLocation Mir2Lir::GetReturnWide(bool is_double) +{ + RegLocation gpr_res = LocCReturnWide(); + RegLocation fpr_res = LocCReturnDouble(); + RegLocation res = is_double ? fpr_res : gpr_res; + Clobber(res.low_reg); + Clobber(res.high_reg); + LockTemp(res.low_reg); + LockTemp(res.high_reg); + MarkPair(res.low_reg, res.high_reg); + return res; +} + +RegLocation Mir2Lir::GetReturn(bool is_float) +{ + RegLocation gpr_res = LocCReturn(); + RegLocation fpr_res = LocCReturnFloat(); + RegLocation res = is_float ? fpr_res : gpr_res; + Clobber(res.low_reg); + if (cu_->instruction_set == kMips) { + MarkInUse(res.low_reg); + } else { + LockTemp(res.low_reg); + } + return res; +} + +void Mir2Lir::SimpleRegAlloc() +{ + DoPromotion(); + + if (cu_->verbose && !(cu_->disable_opt & (1 << kPromoteRegs))) { + LOG(INFO) << "After Promotion"; + mir_graph_->DumpRegLocTable(mir_graph_->reg_location_, mir_graph_->GetNumSSARegs()); + } + + /* Set the frame size */ + frame_size_ = ComputeFrameSize(); +} + +/* + * Get the "real" sreg number associated with an s_reg slot. In general, + * s_reg values passed through codegen are the SSA names created by + * dataflow analysis and refer to slot numbers in the mir_graph_->reg_location + * array. However, renaming is accomplished by simply replacing RegLocation + * entries in the reglocation[] array. Therefore, when location + * records for operands are first created, we need to ask the locRecord + * identified by the dataflow pass what it's new name is. + */ +int Mir2Lir::GetSRegHi(int lowSreg) { + return (lowSreg == INVALID_SREG) ? INVALID_SREG : lowSreg + 1; +} + +bool Mir2Lir::oat_live_out(int s_reg) { + //For now. + return true; +} + +int Mir2Lir::oatSSASrc(MIR* mir, int num) { + DCHECK_GT(mir->ssa_rep->num_uses, num); + return mir->ssa_rep->uses[num]; +} + +} // namespace art diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc new file mode 100644 index 0000000000..4aeda41291 --- /dev/null +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -0,0 +1,1388 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_x86.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "x86_lir.h" + +namespace art { + +#define MAX_ASSEMBLER_RETRIES 50 + +const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = { + { kX8632BitData, kData, IS_UNARY_OP, { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data", "0x!0d" }, + { kX86Bkpt, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" }, + { kX86Nop, kNop, IS_UNARY_OP, { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop", "" }, + +#define ENCODING_MAP(opname, mem_use, reg_def, uses_ccodes, \ + rm8_r8, rm32_r32, \ + r8_rm8, r32_rm32, \ + ax8_i8, ax32_i32, \ + rm8_i8, rm8_i8_modrm, \ + rm32_i32, rm32_i32_modrm, \ + rm32_i8, rm32_i8_modrm) \ +{ kX86 ## opname ## 8MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0 }, #opname "8MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 8AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_r8, 0, 0, 0, 0, 0 }, #opname "8AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 8TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_r8, 0, 0, 0, 0, 0 }, #opname "8TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 8RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 8RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 8RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r8_rm8, 0, 0, 0, 0, 0 }, #opname "8RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 8RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, ax8_i8, 1 }, #opname "8RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 8MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 8AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 8TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm8_i8, 0, 0, rm8_i8_modrm, 0, 1 }, #opname "8TI", "fs:[!0d],!1d" }, \ + \ +{ kX86 ## opname ## 16MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "16MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 16AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "16AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 16TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_r32, 0, 0, 0, 0, 0 }, #opname "16TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 16RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 16RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 16RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0x66, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 16RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, r32_rm32, 0, 0, 0, 0, 0 }, #opname "16RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 16RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 2 }, #opname "16RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 16MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2 }, #opname "16MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 16AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 16TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i32, 0, 0, rm32_i32_modrm, 0, 2 }, #opname "16TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 16RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16RI8", "!0r,!1d" }, \ +{ kX86 ## opname ## 16MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16MI8", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 16AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0x66, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16TI8", "fs:[!0d],!1d" }, \ + \ +{ kX86 ## opname ## 32MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 32AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 32TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 32RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 32RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 32RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 32RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 32MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 32AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 32TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 32RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32RI8", "!0r,!1d" }, \ +{ kX86 ## opname ## 32MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 32AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32TI8", "fs:[!0d],!1d" } + +ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0, + 0x00 /* RegMem8/Reg8 */, 0x01 /* RegMem32/Reg32 */, + 0x02 /* Reg8/RegMem8 */, 0x03 /* Reg32/RegMem32 */, + 0x04 /* Rax8/imm8 opcode */, 0x05 /* Rax32/imm32 */, + 0x80, 0x0 /* RegMem8/imm8 */, + 0x81, 0x0 /* RegMem32/imm32 */, 0x83, 0x0 /* RegMem32/imm8 */), +ENCODING_MAP(Or, IS_LOAD | IS_STORE, REG_DEF0, 0, + 0x08 /* RegMem8/Reg8 */, 0x09 /* RegMem32/Reg32 */, + 0x0A /* Reg8/RegMem8 */, 0x0B /* Reg32/RegMem32 */, + 0x0C /* Rax8/imm8 opcode */, 0x0D /* Rax32/imm32 */, + 0x80, 0x1 /* RegMem8/imm8 */, + 0x81, 0x1 /* RegMem32/imm32 */, 0x83, 0x1 /* RegMem32/imm8 */), +ENCODING_MAP(Adc, IS_LOAD | IS_STORE, REG_DEF0, USES_CCODES, + 0x10 /* RegMem8/Reg8 */, 0x11 /* RegMem32/Reg32 */, + 0x12 /* Reg8/RegMem8 */, 0x13 /* Reg32/RegMem32 */, + 0x14 /* Rax8/imm8 opcode */, 0x15 /* Rax32/imm32 */, + 0x80, 0x2 /* RegMem8/imm8 */, + 0x81, 0x2 /* RegMem32/imm32 */, 0x83, 0x2 /* RegMem32/imm8 */), +ENCODING_MAP(Sbb, IS_LOAD | IS_STORE, REG_DEF0, USES_CCODES, + 0x18 /* RegMem8/Reg8 */, 0x19 /* RegMem32/Reg32 */, + 0x1A /* Reg8/RegMem8 */, 0x1B /* Reg32/RegMem32 */, + 0x1C /* Rax8/imm8 opcode */, 0x1D /* Rax32/imm32 */, + 0x80, 0x3 /* RegMem8/imm8 */, + 0x81, 0x3 /* RegMem32/imm32 */, 0x83, 0x3 /* RegMem32/imm8 */), +ENCODING_MAP(And, IS_LOAD | IS_STORE, REG_DEF0, 0, + 0x20 /* RegMem8/Reg8 */, 0x21 /* RegMem32/Reg32 */, + 0x22 /* Reg8/RegMem8 */, 0x23 /* Reg32/RegMem32 */, + 0x24 /* Rax8/imm8 opcode */, 0x25 /* Rax32/imm32 */, + 0x80, 0x4 /* RegMem8/imm8 */, + 0x81, 0x4 /* RegMem32/imm32 */, 0x83, 0x4 /* RegMem32/imm8 */), +ENCODING_MAP(Sub, IS_LOAD | IS_STORE, REG_DEF0, 0, + 0x28 /* RegMem8/Reg8 */, 0x29 /* RegMem32/Reg32 */, + 0x2A /* Reg8/RegMem8 */, 0x2B /* Reg32/RegMem32 */, + 0x2C /* Rax8/imm8 opcode */, 0x2D /* Rax32/imm32 */, + 0x80, 0x5 /* RegMem8/imm8 */, + 0x81, 0x5 /* RegMem32/imm32 */, 0x83, 0x5 /* RegMem32/imm8 */), +ENCODING_MAP(Xor, IS_LOAD | IS_STORE, REG_DEF0, 0, + 0x30 /* RegMem8/Reg8 */, 0x31 /* RegMem32/Reg32 */, + 0x32 /* Reg8/RegMem8 */, 0x33 /* Reg32/RegMem32 */, + 0x34 /* Rax8/imm8 opcode */, 0x35 /* Rax32/imm32 */, + 0x80, 0x6 /* RegMem8/imm8 */, + 0x81, 0x6 /* RegMem32/imm32 */, 0x83, 0x6 /* RegMem32/imm8 */), +ENCODING_MAP(Cmp, IS_LOAD, 0, 0, + 0x38 /* RegMem8/Reg8 */, 0x39 /* RegMem32/Reg32 */, + 0x3A /* Reg8/RegMem8 */, 0x3B /* Reg32/RegMem32 */, + 0x3C /* Rax8/imm8 opcode */, 0x3D /* Rax32/imm32 */, + 0x80, 0x7 /* RegMem8/imm8 */, + 0x81, 0x7 /* RegMem32/imm32 */, 0x83, 0x7 /* RegMem32/imm8 */), +#undef ENCODING_MAP + + { kX86Imul16RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RRI", "!0r,!1r,!2d" }, + { kX86Imul16RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RMI", "!0r,[!1r+!2d],!3d" }, + { kX86Imul16RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0x66, 0, 0x69, 0, 0, 0, 0, 2 }, "Imul16RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + + { kX86Imul32RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RRI", "!0r,!1r,!2d" }, + { kX86Imul32RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RMI", "!0r,[!1r+!2d],!3d" }, + { kX86Imul32RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x69, 0, 0, 0, 0, 4 }, "Imul32RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + { kX86Imul32RRI8, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RRI8", "!0r,!1r,!2d" }, + { kX86Imul32RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" }, + { kX86Imul32RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + + { kX86Mov8MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" }, + { kX86Mov8AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" }, + { kX86Mov8RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RR", "!0r,!1r" }, + { kX86Mov8RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RM", "!0r,[!1r+!2d]" }, + { kX86Mov8RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov8RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8A, 0, 0, 0, 0, 0 }, "Mov8RT", "!0r,fs:[!1d]" }, + { kX86Mov8RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB0, 0, 0, 0, 0, 1 }, "Mov8RI", "!0r,!1d" }, + { kX86Mov8MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8MI", "[!0r+!1d],!2d" }, + { kX86Mov8AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov8TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC6, 0, 0, 0, 0, 1 }, "Mov8TI", "fs:[!0d],!1d" }, + + { kX86Mov16MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov16MR", "[!0r+!1d],!2r" }, + { kX86Mov16AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov16AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov16TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0x66, 0x89, 0, 0, 0, 0, 0 }, "Mov16TR", "fs:[!0d],!1r" }, + { kX86Mov16RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RR", "!0r,!1r" }, + { kX86Mov16RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RM", "!0r,[!1r+!2d]" }, + { kX86Mov16RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0x66, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov16RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0x66, 0x8B, 0, 0, 0, 0, 0 }, "Mov16RT", "!0r,fs:[!1d]" }, + { kX86Mov16RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0x66, 0, 0xB8, 0, 0, 0, 0, 2 }, "Mov16RI", "!0r,!1d" }, + { kX86Mov16MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0x66, 0, 0xC7, 0, 0, 0, 0, 2 }, "Mov16MI", "[!0r+!1d],!2d" }, + { kX86Mov16AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0x66, 0, 0xC7, 0, 0, 0, 0, 2 }, "Mov16AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" }, + + { kX86Mov32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" }, + { kX86Mov32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" }, + { kX86Mov32RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" }, + { kX86Mov32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" }, + { kX86Mov32RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov32RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" }, + { kX86Mov32RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" }, + { kX86Mov32MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" }, + { kX86Mov32AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" }, + + { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + +#define SHIFT_ENCODING_MAP(opname, modrm_opcode) \ +{ kX86 ## opname ## 8RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 8MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 8AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 8RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1 }, #opname "8RC", "!0r,cl" }, \ +{ kX86 ## opname ## 8MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1 }, #opname "8MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 8AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD2, 0, 0, modrm_opcode, 0, 1 }, #opname "8AC", "[!0r+!1r<<!2d+!3d],cl" }, \ + \ +{ kX86 ## opname ## 16RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 16MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 16AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "16AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 16RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "16RC", "!0r,cl" }, \ +{ kX86 ## opname ## 16MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "16MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 16AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0x66, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "16AC", "[!0r+!1r<<!2d+!3d],cl" }, \ + \ +{ kX86 ## opname ## 32RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 32MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 32RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32RC", "!0r,cl" }, \ +{ kX86 ## opname ## 32MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" } + + SHIFT_ENCODING_MAP(Rol, 0x0), + SHIFT_ENCODING_MAP(Ror, 0x1), + SHIFT_ENCODING_MAP(Rcl, 0x2), + SHIFT_ENCODING_MAP(Rcr, 0x3), + SHIFT_ENCODING_MAP(Sal, 0x4), + SHIFT_ENCODING_MAP(Shr, 0x5), + SHIFT_ENCODING_MAP(Sar, 0x7), +#undef SHIFT_ENCODING_MAP + + { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" }, + + { kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" }, + { kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" }, + { kX86Test8AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test16RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16RI", "!0r,!1d" }, + { kX86Test16MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16MI", "[!0r+!1d],!2d" }, + { kX86Test16AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0x66, 0, 0xF7, 0, 0, 0, 0, 2}, "Test16AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test32RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32RI", "!0r,!1d" }, + { kX86Test32MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32MI", "[!0r+!1d],!2d" }, + { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test32RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0}, "Test32RR", "!0r,!1r" }, + +#define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \ + reg, reg_kind, reg_flags, \ + mem, mem_kind, mem_flags, \ + arr, arr_kind, arr_flags, imm, \ + b_flags, hw_flags, w_flags, \ + b_format, hw_format, w_format) \ +{ kX86 ## opname ## 8 ## reg, reg_kind, reg_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #reg, #b_format "!0r" }, \ +{ kX86 ## opname ## 8 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #mem, #b_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 8 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | b_flags | sets_ccodes, { 0, 0, 0xF6, 0, 0, modrm, 0, imm << 0}, #opname "8" #arr, #b_format "[!0r+!1r<<!2d+!3d]" }, \ +{ kX86 ## opname ## 16 ## reg, reg_kind, reg_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #reg, #hw_format "!0r" }, \ +{ kX86 ## opname ## 16 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #mem, #hw_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, #hw_format "[!0r+!1r<<!2d+!3d]" }, \ +{ kX86 ## opname ## 32 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, #w_format "!0r" }, \ +{ kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, #w_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, #w_format "[!0r+!1r<<!2d+!3d]" } + + UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""), + UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""), + + UNARY_ENCODING_MAP(Mul, 0x4, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA, REG_DEFAD_USEA, "ax,al,", "dx:ax,ax,", "edx:eax,eax,"), + UNARY_ENCODING_MAP(Imul, 0x5, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA, REG_DEFAD_USEA, "ax,al,", "dx:ax,ax,", "edx:eax,eax,"), + UNARY_ENCODING_MAP(Divmod, 0x6, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"), + UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"), +#undef UNARY_ENCODING_MAP + +#define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \ +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } + + EXT_0F_ENCODING_MAP(Movsd, 0xF2, 0x10, REG_DEF0), + { kX86MovsdMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdMR", "[!0r+!1d],!2r" }, + { kX86MovsdAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF2, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovsdAR", "[!0r+!1r<<!2d+!3d],!4r" }, + + EXT_0F_ENCODING_MAP(Movss, 0xF3, 0x10, REG_DEF0), + { kX86MovssMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssMR", "[!0r+!1d],!2r" }, + { kX86MovssAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0xF3, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovssAR", "[!0r+!1r<<!2d+!3d],!4r" }, + + EXT_0F_ENCODING_MAP(Cvtsi2sd, 0xF2, 0x2A, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvtsi2ss, 0xF3, 0x2A, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvtsd2si, 0xF2, 0x2D, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvtss2si, 0xF3, 0x2D, REG_DEF0), + EXT_0F_ENCODING_MAP(Ucomisd, 0x66, 0x2E, SETS_CCODES), + EXT_0F_ENCODING_MAP(Ucomiss, 0x00, 0x2E, SETS_CCODES), + EXT_0F_ENCODING_MAP(Comisd, 0x66, 0x2F, SETS_CCODES), + EXT_0F_ENCODING_MAP(Comiss, 0x00, 0x2F, SETS_CCODES), + EXT_0F_ENCODING_MAP(Orps, 0x00, 0x56, REG_DEF0), + EXT_0F_ENCODING_MAP(Xorps, 0x00, 0x57, REG_DEF0), + EXT_0F_ENCODING_MAP(Addsd, 0xF2, 0x58, REG_DEF0), + EXT_0F_ENCODING_MAP(Addss, 0xF3, 0x58, REG_DEF0), + EXT_0F_ENCODING_MAP(Mulsd, 0xF2, 0x59, REG_DEF0), + EXT_0F_ENCODING_MAP(Mulss, 0xF3, 0x59, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvtsd2ss, 0xF2, 0x5A, REG_DEF0), + EXT_0F_ENCODING_MAP(Cvtss2sd, 0xF3, 0x5A, REG_DEF0), + EXT_0F_ENCODING_MAP(Subsd, 0xF2, 0x5C, REG_DEF0), + EXT_0F_ENCODING_MAP(Subss, 0xF3, 0x5C, REG_DEF0), + EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E, REG_DEF0), + EXT_0F_ENCODING_MAP(Divss, 0xF3, 0x5E, REG_DEF0), + + { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" }, + { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" }, + + EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E, REG_DEF0), + { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" }, + { kX86MovdrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" }, + { kX86MovdrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" }, + + { kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" }, + { kX86Set8M, kMemCond, IS_STORE | IS_TERTIARY_OP | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" }, + { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" }, + + // TODO: load/store? + // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly. + { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" }, + + EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_DEF0 | SETS_CCODES), + EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_DEF0 | SETS_CCODES), + + { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" }, + { kX86CmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" }, + { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86LockCmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "!0r,!1r" }, + { kX86LockCmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1d],!2r" }, + { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, + + EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0), + EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0), + EXT_0F_ENCODING_MAP(Movsx8, 0x00, 0xBE, REG_DEF0), + EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0), +#undef EXT_0F_ENCODING_MAP + + { kX86Jcc8, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x70, 0, 0, 0, 0, 0 }, "Jcc8", "!1c !0t" }, + { kX86Jcc32, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x0F, 0x80, 0, 0, 0, 0 }, "Jcc32", "!1c !0t" }, + { kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" }, + { kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" }, + { kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpR", "!0r" }, + { kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" }, + { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" }, + { kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD | REG_USE01, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" }, + { kX86CallT, kCall, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallT", "fs:[!0d]" }, + { kX86Ret, kNullary,NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" }, + + { kX86StartOfMethod, kMacro, IS_UNARY_OP | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0 }, "StartOfMethod", "!0r" }, + { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, + { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr", "!0r,!1d" }, +}; + +static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) { + size_t size = 0; + if (entry->skeleton.prefix1 > 0) { + ++size; + if (entry->skeleton.prefix2 > 0) { + ++size; + } + } + ++size; // opcode + if (entry->skeleton.opcode == 0x0F) { + ++size; + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { + ++size; + } + } + ++size; // modrm + if (has_sib || base == rX86_SP) { + // SP requires a SIB byte. + ++size; + } + if (displacement != 0 || base == rBP) { + // BP requires an explicit displacement, even when it's 0. + if (entry->opcode != kX86Lea32RA) { + DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name; + } + size += IS_SIMM8(displacement) ? 1 : 4; + } + size += entry->skeleton.immediate_bytes; + return size; +} + +int X86Mir2Lir::GetInsnSize(LIR* lir) { + const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode]; + switch (entry->kind) { + case kData: + return 4; // 4 bytes of data + case kNop: + return lir->operands[0]; // length of nop is sole operand + case kNullary: + return 1; // 1 byte of opcode + case kReg: // lir operands - 0: reg + return ComputeSize(entry, 0, 0, false); + case kMem: // lir operands - 0: base, 1: disp + return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp + return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + case kMemReg: // lir operands - 0: base, 1: disp, 2: reg + return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg + return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + case kThreadReg: // lir operands - 0: disp, 1: reg + return ComputeSize(entry, 0, lir->operands[0], false); + case kRegReg: + return ComputeSize(entry, 0, 0, false); + case kRegRegStore: + return ComputeSize(entry, 0, 0, false); + case kRegMem: // lir operands - 0: reg, 1: base, 2: disp + return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp + return ComputeSize(entry, lir->operands[1], lir->operands[4], true); + case kRegThread: // lir operands - 0: reg, 1: disp + return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + case kRegImm: { // lir operands - 0: reg, 1: immediate + size_t size = ComputeSize(entry, 0, 0, false); + if (entry->skeleton.ax_opcode == 0) { + return size; + } else { + // AX opcodes don't require the modrm byte. + int reg = lir->operands[0]; + return size - (reg == rAX ? 1 : 0); + } + } + case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate + return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + case kArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate + return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + case kThreadImm: // lir operands - 0: disp, 1: imm + return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm + return ComputeSize(entry, 0, 0, false); + case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm + return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + case kRegArrayImm: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm + return ComputeSize(entry, lir->operands[1], lir->operands[4], true); + case kMovRegImm: // lir operands - 0: reg, 1: immediate + return 1 + entry->skeleton.immediate_bytes; + case kShiftRegImm: // lir operands - 0: reg, 1: immediate + // Shift by immediate one has a shorter opcode. + return ComputeSize(entry, 0, 0, false) - (lir->operands[1] == 1 ? 1 : 0); + case kShiftMemImm: // lir operands - 0: base, 1: disp, 2: immediate + // Shift by immediate one has a shorter opcode. + return ComputeSize(entry, lir->operands[0], lir->operands[1], false) - + (lir->operands[2] == 1 ? 1 : 0); + case kShiftArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate + // Shift by immediate one has a shorter opcode. + return ComputeSize(entry, lir->operands[0], lir->operands[3], true) - + (lir->operands[4] == 1 ? 1 : 0); + case kShiftRegCl: + return ComputeSize(entry, 0, 0, false); + case kShiftMemCl: // lir operands - 0: base, 1: disp, 2: cl + return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg + return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + case kRegCond: // lir operands - 0: reg, 1: cond + return ComputeSize(entry, 0, 0, false); + case kMemCond: // lir operands - 0: base, 1: disp, 2: cond + return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond + return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + case kJcc: + if (lir->opcode == kX86Jcc8) { + return 2; // opcode + rel8 + } else { + DCHECK(lir->opcode == kX86Jcc32); + return 6; // 2 byte opcode + rel32 + } + case kJmp: + if (lir->opcode == kX86Jmp8) { + return 2; // opcode + rel8 + } else if (lir->opcode == kX86Jmp32) { + return 5; // opcode + rel32 + } else { + DCHECK(lir->opcode == kX86JmpR); + return 2; // opcode + modrm + } + case kCall: + switch (lir->opcode) { + case kX86CallR: return 2; // opcode modrm + case kX86CallM: // lir operands - 0: base, 1: disp + return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + case kX86CallA: // lir operands - 0: base, 1: index, 2: scale, 3: disp + return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + case kX86CallT: // lir operands - 0: disp + return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + default: + break; + } + break; + case kPcRel: + if (entry->opcode == kX86PcRelLoadRA) { + // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table + return ComputeSize(entry, lir->operands[1], 0x12345678, true); + } else { + DCHECK(entry->opcode == kX86PcRelAdr); + return 5; // opcode with reg + 4 byte immediate + } + case kMacro: + DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); + return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + + ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, false) - + (lir->operands[0] == rAX ? 1 : 0); // shorter ax encoding + default: + break; + } + UNIMPLEMENTED(FATAL) << "Unimplemented size encoding for: " << entry->name; + return 0; +} + +static uint8_t ModrmForDisp(int base, int disp) { + // BP requires an explicit disp, so do not omit it in the 0 case + if (disp == 0 && base != rBP) { + return 0; + } else if (IS_SIMM8(disp)) { + return 1; + } else { + return 2; + } +} + +void X86Mir2Lir::EmitDisp(int base, int disp) { + // BP requires an explicit disp, so do not omit it in the 0 case + if (disp == 0 && base != rBP) { + return; + } else if (IS_SIMM8(disp)) { + code_buffer_.push_back(disp & 0xFF); + } else { + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + } +} + +void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg)) { + reg = reg & X86_FP_REG_MASK; + } + if (reg >= 4) { + DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg) + << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + code_buffer_.push_back(modrm); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + DCHECK_LT(entry->skeleton.modrm_opcode, 8); + DCHECK_LT(base, 8); + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base; + code_buffer_.push_back(modrm); + EmitDisp(base, disp); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, + uint8_t base, int disp, uint8_t reg) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg)) { + reg = reg & X86_FP_REG_MASK; + } + if (reg >= 4) { + DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg) + << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + DCHECK_LT(reg, 8); + DCHECK_LT(base, 8); + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg << 3) | base; + code_buffer_.push_back(modrm); + if (base == rX86_SP) { + // Special SIB for SP base + code_buffer_.push_back(0 << 6 | (rX86_SP << 3) | rX86_SP); + } + EmitDisp(base, disp); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, + uint8_t reg, uint8_t base, int disp) { + // Opcode will flip operands. + EmitMemReg(entry, base, disp, reg); +} + +void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index, + int scale, int disp) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg)) { + reg = reg & X86_FP_REG_MASK; + } + DCHECK_LT(reg, 8); + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg << 3) | rX86_SP; + code_buffer_.push_back(modrm); + DCHECK_LT(scale, 4); + DCHECK_LT(index, 8); + DCHECK_LT(base, 8); + uint8_t sib = (scale << 6) | (index << 3) | base; + code_buffer_.push_back(sib); + EmitDisp(base, disp); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp, + uint8_t reg) { + // Opcode will flip operands. + EmitRegArray(entry, reg, base, index, scale, disp); +} + +void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) { + DCHECK_NE(entry->skeleton.prefix1, 0); + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg)) { + reg = reg & X86_FP_REG_MASK; + } + if (reg >= 4) { + DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg) + << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + DCHECK_LT(reg, 8); + uint8_t modrm = (0 << 6) | (reg << 3) | rBP; + code_buffer_.push_back(modrm); + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg1)) { + reg1 = reg1 & X86_FP_REG_MASK; + } + if (X86_FPREG(reg2)) { + reg2 = reg2 & X86_FP_REG_MASK; + } + DCHECK_LT(reg1, 8); + DCHECK_LT(reg2, 8); + uint8_t modrm = (3 << 6) | (reg1 << 3) | reg2; + code_buffer_.push_back(modrm); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, + uint8_t reg1, uint8_t reg2, int32_t imm) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg1)) { + reg1 = reg1 & X86_FP_REG_MASK; + } + if (X86_FPREG(reg2)) { + reg2 = reg2 & X86_FP_REG_MASK; + } + DCHECK_LT(reg1, 8); + DCHECK_LT(reg2, 8); + uint8_t modrm = (3 << 6) | (reg1 << 3) | reg2; + code_buffer_.push_back(modrm); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + switch (entry->skeleton.immediate_bytes) { + case 1: + DCHECK(IS_SIMM8(imm)); + code_buffer_.push_back(imm & 0xFF); + break; + case 2: + DCHECK(IS_SIMM16(imm)); + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + break; + case 4: + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); + break; + default: + LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes + << ") for instruction: " << entry->name; + break; + } +} + +void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + if (reg == rAX && entry->skeleton.ax_opcode != 0) { + code_buffer_.push_back(entry->skeleton.ax_opcode); + } else { + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (X86_FPREG(reg)) { + reg = reg & X86_FP_REG_MASK; + } + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + code_buffer_.push_back(modrm); + } + switch (entry->skeleton.immediate_bytes) { + case 1: + DCHECK(IS_SIMM8(imm)); + code_buffer_.push_back(imm & 0xFF); + break; + case 2: + DCHECK(IS_SIMM16(imm)); + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + break; + case 4: + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); + break; + default: + LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes + << ") for instruction: " << entry->name; + break; + } +} + +void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rBP; + code_buffer_.push_back(modrm); + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + switch (entry->skeleton.immediate_bytes) { + case 1: + DCHECK(IS_SIMM8(imm)); + code_buffer_.push_back(imm & 0xFF); + break; + case 2: + DCHECK(IS_SIMM16(imm)); + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + break; + case 4: + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); + break; + default: + LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes + << ") for instruction: " << entry->name; + break; + } + DCHECK_EQ(entry->skeleton.ax_opcode, 0); +} + +void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { + DCHECK_LT(reg, 8); + code_buffer_.push_back(0xB8 + reg); + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); +} + +void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + if (imm != 1) { + code_buffer_.push_back(entry->skeleton.opcode); + } else { + // Shorter encoding for 1 bit shift + code_buffer_.push_back(entry->skeleton.ax_opcode); + } + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + if (reg >= 4) { + DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg) + << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + code_buffer_.push_back(modrm); + if (imm != 1) { + DCHECK_EQ(entry->skeleton.immediate_bytes, 1); + DCHECK(IS_SIMM8(imm)); + code_buffer_.push_back(imm & 0xFF); + } +} + +void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) { + DCHECK_EQ(cl, static_cast<uint8_t>(rCX)); + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + code_buffer_.push_back(modrm); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0x0F, entry->skeleton.opcode); + code_buffer_.push_back(0x0F); + DCHECK_EQ(0x90, entry->skeleton.extra_opcode1); + code_buffer_.push_back(0x90 | condition); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + code_buffer_.push_back(modrm); + DCHECK_EQ(entry->skeleton.immediate_bytes, 0); +} + +void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) { + if (entry->opcode == kX86Jmp8) { + DCHECK(IS_SIMM8(rel)); + code_buffer_.push_back(0xEB); + code_buffer_.push_back(rel & 0xFF); + } else if (entry->opcode == kX86Jmp32) { + code_buffer_.push_back(0xE9); + code_buffer_.push_back(rel & 0xFF); + code_buffer_.push_back((rel >> 8) & 0xFF); + code_buffer_.push_back((rel >> 16) & 0xFF); + code_buffer_.push_back((rel >> 24) & 0xFF); + } else { + DCHECK(entry->opcode == kX86JmpR); + code_buffer_.push_back(entry->skeleton.opcode); + uint8_t reg = static_cast<uint8_t>(rel); + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + code_buffer_.push_back(modrm); + } +} + +void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) { + DCHECK_LT(cc, 16); + if (entry->opcode == kX86Jcc8) { + DCHECK(IS_SIMM8(rel)); + code_buffer_.push_back(0x70 | cc); + code_buffer_.push_back(rel & 0xFF); + } else { + DCHECK(entry->opcode == kX86Jcc32); + code_buffer_.push_back(0x0F); + code_buffer_.push_back(0x80 | cc); + code_buffer_.push_back(rel & 0xFF); + code_buffer_.push_back((rel >> 8) & 0xFF); + code_buffer_.push_back((rel >> 16) & 0xFF); + code_buffer_.push_back((rel >> 24) & 0xFF); + } +} + +void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base; + code_buffer_.push_back(modrm); + if (base == rX86_SP) { + // Special SIB for SP base + code_buffer_.push_back(0 << 6 | (rX86_SP << 3) | rX86_SP); + } + EmitDisp(base, disp); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) { + DCHECK_NE(entry->skeleton.prefix1, 0); + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rBP; + code_buffer_.push_back(modrm); + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, + int base_or_table, uint8_t index, int scale, int table_or_disp) { + int disp; + if (entry->opcode == kX86PcRelLoadRA) { + Mir2Lir::SwitchTable *tab_rec = reinterpret_cast<Mir2Lir::SwitchTable*>(table_or_disp); + disp = tab_rec->offset; + } else { + DCHECK(entry->opcode == kX86PcRelAdr); + Mir2Lir::FillArrayData *tab_rec = reinterpret_cast<Mir2Lir::FillArrayData*>(base_or_table); + disp = tab_rec->offset; + } + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + if (X86_FPREG(reg)) { + reg = reg & X86_FP_REG_MASK; + } + DCHECK_LT(reg, 8); + if (entry->opcode == kX86PcRelLoadRA) { + code_buffer_.push_back(entry->skeleton.opcode); + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + uint8_t modrm = (2 << 6) | (reg << 3) | rX86_SP; + code_buffer_.push_back(modrm); + DCHECK_LT(scale, 4); + DCHECK_LT(index, 8); + DCHECK_LT(base_or_table, 8); + uint8_t base = static_cast<uint8_t>(base_or_table); + uint8_t sib = (scale << 6) | (index << 3) | base; + code_buffer_.push_back(sib); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); + } else { + code_buffer_.push_back(entry->skeleton.opcode + reg); + } + code_buffer_.push_back(disp & 0xFF); + code_buffer_.push_back((disp >> 8) & 0xFF); + code_buffer_.push_back((disp >> 16) & 0xFF); + code_buffer_.push_back((disp >> 24) & 0xFF); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); +} + +void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) { + DCHECK(entry->opcode == kX86StartOfMethod) << entry->name; + code_buffer_.push_back(0xE8); // call +0 + code_buffer_.push_back(0); + code_buffer_.push_back(0); + code_buffer_.push_back(0); + code_buffer_.push_back(0); + + DCHECK_LT(reg, 8); + code_buffer_.push_back(0x58 + reg); // pop reg + + EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], reg, offset + 5 /* size of call +0 */); +} + +void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { + UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " " + << BuildInsnString(entry->fmt, lir, 0); + for (int i = 0; i < GetInsnSize(lir); ++i) { + code_buffer_.push_back(0xCC); // push breakpoint instruction - int 3 + } +} + +/* + * Assemble the LIR into binary instruction format. Note that we may + * discover that pc-relative displacements may not fit the selected + * instruction. In those cases we will try to substitute a new code + * sequence or request that the trace be shortened and retried. + */ +AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { + LIR *lir; + AssemblerStatus res = kSuccess; // Assume success + + const bool kVerbosePcFixup = false; + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + if (lir->opcode < 0) { + continue; + } + + if (lir->flags.is_nop) { + continue; + } + + if (lir->flags.pcRelFixup) { + switch (lir->opcode) { + case kX86Jcc8: { + LIR *target_lir = lir->target; + DCHECK(target_lir != NULL); + int delta = 0; + uintptr_t pc; + if (IS_SIMM8(lir->operands[0])) { + pc = lir->offset + 2 /* opcode + rel8 */; + } else { + pc = lir->offset + 6 /* 2 byte opcode + rel32 */; + } + uintptr_t target = target_lir->offset; + delta = target - pc; + if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) { + if (kVerbosePcFixup) { + LOG(INFO) << "Retry for JCC growth at " << lir->offset + << " delta: " << delta << " old delta: " << lir->operands[0]; + } + lir->opcode = kX86Jcc32; + SetupResourceMasks(lir); + res = kRetryAll; + } + if (kVerbosePcFixup) { + LOG(INFO) << "Source:"; + DumpLIRInsn(lir, 0); + LOG(INFO) << "Target:"; + DumpLIRInsn(target_lir, 0); + LOG(INFO) << "Delta " << delta; + } + lir->operands[0] = delta; + break; + } + case kX86Jcc32: { + LIR *target_lir = lir->target; + DCHECK(target_lir != NULL); + uintptr_t pc = lir->offset + 6 /* 2 byte opcode + rel32 */; + uintptr_t target = target_lir->offset; + int delta = target - pc; + if (kVerbosePcFixup) { + LOG(INFO) << "Source:"; + DumpLIRInsn(lir, 0); + LOG(INFO) << "Target:"; + DumpLIRInsn(target_lir, 0); + LOG(INFO) << "Delta " << delta; + } + lir->operands[0] = delta; + break; + } + case kX86Jmp8: { + LIR *target_lir = lir->target; + DCHECK(target_lir != NULL); + int delta = 0; + uintptr_t pc; + if (IS_SIMM8(lir->operands[0])) { + pc = lir->offset + 2 /* opcode + rel8 */; + } else { + pc = lir->offset + 5 /* opcode + rel32 */; + } + uintptr_t target = target_lir->offset; + delta = target - pc; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && delta == 0) { + // Useless branch + lir->flags.is_nop = true; + if (kVerbosePcFixup) { + LOG(INFO) << "Retry for useless branch at " << lir->offset; + } + res = kRetryAll; + } else if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) { + if (kVerbosePcFixup) { + LOG(INFO) << "Retry for JMP growth at " << lir->offset; + } + lir->opcode = kX86Jmp32; + SetupResourceMasks(lir); + res = kRetryAll; + } + lir->operands[0] = delta; + break; + } + case kX86Jmp32: { + LIR *target_lir = lir->target; + DCHECK(target_lir != NULL); + uintptr_t pc = lir->offset + 5 /* opcode + rel32 */; + uintptr_t target = target_lir->offset; + int delta = target - pc; + lir->operands[0] = delta; + break; + } + default: + break; + } + } + + /* + * If one of the pc-relative instructions expanded we'll have + * to make another pass. Don't bother to fully assemble the + * instruction. + */ + if (res != kSuccess) { + continue; + } + CHECK_EQ(static_cast<size_t>(lir->offset), code_buffer_.size()); + const X86EncodingMap *entry = &X86Mir2Lir::EncodingMap[lir->opcode]; + size_t starting_cbuf_size = code_buffer_.size(); + switch (entry->kind) { + case kData: // 4 bytes of data + code_buffer_.push_back(lir->operands[0]); + break; + case kNullary: // 1 byte of opcode + DCHECK_EQ(0, entry->skeleton.prefix1); + DCHECK_EQ(0, entry->skeleton.prefix2); + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.extra_opcode1 != 0) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + if (entry->skeleton.extra_opcode2 != 0) { + code_buffer_.push_back(entry->skeleton.extra_opcode2); + } + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); + break; + case kReg: // lir operands - 0: reg + EmitOpReg(entry, lir->operands[0]); + break; + case kMem: // lir operands - 0: base, 1: disp + EmitOpMem(entry, lir->operands[0], lir->operands[1]); + break; + case kMemReg: // lir operands - 0: base, 1: disp, 2: reg + EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]); + break; + case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg + EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3], lir->operands[4]); + break; + case kRegMem: // lir operands - 0: reg, 1: base, 2: disp + EmitRegMem(entry, lir->operands[0], lir->operands[1], lir->operands[2]); + break; + case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp + EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3], lir->operands[4]); + break; + case kRegThread: // lir operands - 0: reg, 1: disp + EmitRegThread(entry, lir->operands[0], lir->operands[1]); + break; + case kRegReg: // lir operands - 0: reg1, 1: reg2 + EmitRegReg(entry, lir->operands[0], lir->operands[1]); + break; + case kRegRegStore: // lir operands - 0: reg2, 1: reg1 + EmitRegReg(entry, lir->operands[1], lir->operands[0]); + break; + case kRegRegImm: + EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]); + break; + case kRegImm: // lir operands - 0: reg, 1: immediate + EmitRegImm(entry, lir->operands[0], lir->operands[1]); + break; + case kThreadImm: // lir operands - 0: disp, 1: immediate + EmitThreadImm(entry, lir->operands[0], lir->operands[1]); + break; + case kMovRegImm: // lir operands - 0: reg, 1: immediate + EmitMovRegImm(entry, lir->operands[0], lir->operands[1]); + break; + case kShiftRegImm: // lir operands - 0: reg, 1: immediate + EmitShiftRegImm(entry, lir->operands[0], lir->operands[1]); + break; + case kShiftRegCl: // lir operands - 0: reg, 1: cl + EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]); + break; + case kRegCond: // lir operands - 0: reg, 1: condition + EmitRegCond(entry, lir->operands[0], lir->operands[1]); + break; + case kJmp: // lir operands - 0: rel + EmitJmp(entry, lir->operands[0]); + break; + case kJcc: // lir operands - 0: rel, 1: CC, target assigned + EmitJcc(entry, lir->operands[0], lir->operands[1]); + break; + case kCall: + switch (entry->opcode) { + case kX86CallM: // lir operands - 0: base, 1: disp + EmitCallMem(entry, lir->operands[0], lir->operands[1]); + break; + case kX86CallT: // lir operands - 0: disp + EmitCallThread(entry, lir->operands[0]); + break; + default: + EmitUnimplemented(entry, lir); + break; + } + break; + case kPcRel: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table + EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3], lir->operands[4]); + break; + case kMacro: + EmitMacro(entry, lir->operands[0], lir->offset); + break; + default: + EmitUnimplemented(entry, lir); + break; + } + CHECK_EQ(static_cast<size_t>(GetInsnSize(lir)), + code_buffer_.size() - starting_cbuf_size) + << "Instruction size mismatch for entry: " << X86Mir2Lir::EncodingMap[lir->opcode].name; + } + return res; +} + +} // namespace art diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc new file mode 100644 index 0000000000..d60be72c31 --- /dev/null +++ b/compiler/dex/quick/x86/call_x86.cc @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the X86 ISA */ + +#include "codegen_x86.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "x86_lir.h" + +namespace art { + +void X86Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, + SpecialCaseHandler special_case) +{ + // TODO +} + +/* + * The sparse table in the literal pool is an array of <key,displacement> + * pairs. + */ +void X86Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpSparseSwitchTable(table); + } + int entries = table[1]; + const int* keys = reinterpret_cast<const int*>(&table[2]); + const int* targets = &keys[entries]; + rl_src = LoadValue(rl_src, kCoreReg); + for (int i = 0; i < entries; i++) { + int key = keys[i]; + BasicBlock* case_block = + mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]); + OpCmpImmBranch(kCondEq, rl_src.low_reg, key, + &block_label_list_[case_block->id]); + } +} + +/* + * Code pattern will look something like: + * + * mov r_val, .. + * call 0 + * pop r_start_of_method + * sub r_start_of_method, .. + * mov r_key_reg, r_val + * sub r_key_reg, low_key + * cmp r_key_reg, size-1 ; bound check + * ja done + * mov r_disp, [r_start_of_method + r_key_reg * 4 + table_offset] + * add r_start_of_method, r_disp + * jmp r_start_of_method + * done: + */ +void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, + RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + if (cu_->verbose) { + DumpPackedSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tab_rec = + static_cast<SwitchTable *>(arena_->NewMem(sizeof(SwitchTable), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + int size = table[1]; + tab_rec->targets = static_cast<LIR**>(arena_->NewMem(size * sizeof(LIR*), true, + ArenaAllocator::kAllocLIR)); + switch_tables_.Insert(tab_rec); + + // Get the switch value + rl_src = LoadValue(rl_src, kCoreReg); + int start_of_method_reg = AllocTemp(); + // Materialize a pointer to the switch table + //NewLIR0(kX86Bkpt); + NewLIR1(kX86StartOfMethod, start_of_method_reg); + int low_key = s4FromSwitchData(&table[2]); + int keyReg; + // Remove the bias, if necessary + if (low_key == 0) { + keyReg = rl_src.low_reg; + } else { + keyReg = AllocTemp(); + OpRegRegImm(kOpSub, keyReg, rl_src.low_reg, low_key); + } + // Bounds check - if < 0 or >= size continue following switch + OpRegImm(kOpCmp, keyReg, size-1); + LIR* branch_over = OpCondBranch(kCondHi, NULL); + + // Load the displacement from the switch table + int disp_reg = AllocTemp(); + NewLIR5(kX86PcRelLoadRA, disp_reg, start_of_method_reg, keyReg, 2, + reinterpret_cast<uintptr_t>(tab_rec)); + // Add displacement to start of method + OpRegReg(kOpAdd, start_of_method_reg, disp_reg); + // ..and go! + LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg); + tab_rec->anchor = switch_branch; + + /* branch_over target here */ + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; +} + +/* + * Array data table format: + * ushort ident = 0x0300 magic value + * ushort width width of each element in the table + * uint size number of elements in the table + * ubyte data[size*width] table of data values (may contain a single-byte + * padding at the end) + * + * Total size is 4+(width * size + 1)/2 16-bit code units. + */ +void X86Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) +{ + const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; + // Add the table to the list - we'll process it later + FillArrayData *tab_rec = + static_cast<FillArrayData*>(arena_->NewMem(sizeof(FillArrayData), true, + ArenaAllocator::kAllocData)); + tab_rec->table = table; + tab_rec->vaddr = current_dalvik_offset_; + uint16_t width = tab_rec->table[1]; + uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16); + tab_rec->size = (size * width) + 8; + + fill_array_data_.Insert(tab_rec); + + // Making a call - use explicit registers + FlushAllRegs(); /* Everything to home location */ + LoadValueDirectFixed(rl_src, rX86_ARG0); + // Materialize a pointer to the fill data image + NewLIR1(kX86StartOfMethod, rX86_ARG2); + NewLIR2(kX86PcRelAdr, rX86_ARG1, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2); + CallRuntimeHelperRegReg(ENTRYPOINT_OFFSET(pHandleFillArrayDataFromCode), rX86_ARG0, + rX86_ARG1, true); +} + +void X86Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) +{ + FlushAllRegs(); + LoadValueDirectFixed(rl_src, rCX); // Get obj + LockCallTemps(); // Prepare for explicit register usage + GenNullCheck(rl_src.s_reg_low, rCX, opt_flags); + // If lock is unheld, try to grab it quickly with compare and exchange + // TODO: copy and clear hash state? + NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value()); + NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT); + NewLIR2(kX86Xor32RR, rAX, rAX); + NewLIR3(kX86LockCmpxchgMR, rCX, mirror::Object::MonitorOffset().Int32Value(), rDX); + LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); + // If lock is held, go the expensive route - artLockObjectFromCode(self, obj); + CallRuntimeHelperReg(ENTRYPOINT_OFFSET(pLockObjectFromCode), rCX, true); + branch->target = NewLIR0(kPseudoTargetLabel); +} + +void X86Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) +{ + FlushAllRegs(); + LoadValueDirectFixed(rl_src, rAX); // Get obj + LockCallTemps(); // Prepare for explicit register usage + GenNullCheck(rl_src.s_reg_low, rAX, opt_flags); + // If lock is held by the current thread, clear it to quickly release it + // TODO: clear hash state? + NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value()); + NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT); + NewLIR3(kX86Mov32RM, rCX, rAX, mirror::Object::MonitorOffset().Int32Value()); + OpRegReg(kOpSub, rCX, rDX); + LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); + NewLIR3(kX86Mov32MR, rAX, mirror::Object::MonitorOffset().Int32Value(), rCX); + LIR* branch2 = NewLIR1(kX86Jmp8, 0); + branch->target = NewLIR0(kPseudoTargetLabel); + // Otherwise, go the expensive route - UnlockObjectFromCode(obj); + CallRuntimeHelperReg(ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rAX, true); + branch2->target = NewLIR0(kPseudoTargetLabel); +} + +void X86Mir2Lir::GenMoveException(RegLocation rl_dest) +{ + int ex_offset = Thread::ExceptionOffset().Int32Value(); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + NewLIR2(kX86Mov32RT, rl_result.low_reg, ex_offset); + NewLIR2(kX86Mov32TI, ex_offset, 0); + StoreValue(rl_dest, rl_result); +} + +/* + * Mark garbage collection card. Skip if the value we're storing is null. + */ +void X86Mir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) +{ + int reg_card_base = AllocTemp(); + int reg_card_no = AllocTemp(); + LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); + NewLIR2(kX86Mov32RT, reg_card_base, Thread::CardTableOffset().Int32Value()); + OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); + StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, + kUnsignedByte); + LIR* target = NewLIR0(kPseudoTargetLabel); + branch_over->target = target; + FreeTemp(reg_card_base); + FreeTemp(reg_card_no); +} + +void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) +{ + /* + * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live. Let the register + * allocation mechanism know so it doesn't try to use any of them when + * expanding the frame or flushing. This leaves the utility + * code with no spare temps. + */ + LockTemp(rX86_ARG0); + LockTemp(rX86_ARG1); + LockTemp(rX86_ARG2); + + /* Build frame, return address already on stack */ + OpRegImm(kOpSub, rX86_SP, frame_size_ - 4); + + /* + * We can safely skip the stack overflow check if we're + * a leaf *and* our frame size < fudge factor. + */ + bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && + (static_cast<size_t>(frame_size_) < + Thread::kStackOverflowReservedBytes)); + NewLIR0(kPseudoMethodEntry); + /* Spill core callee saves */ + SpillCoreRegs(); + /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */ + DCHECK_EQ(num_fp_spills_, 0); + if (!skip_overflow_check) { + // cmp rX86_SP, fs:[stack_end_]; jcc throw_launchpad + LIR* tgt = RawLIR(0, kPseudoThrowTarget, kThrowStackOverflow, 0, 0, 0, 0); + OpRegThreadMem(kOpCmp, rX86_SP, Thread::StackEndOffset().Int32Value()); + OpCondBranch(kCondUlt, tgt); + // Remember branch target - will process later + throw_launchpads_.Insert(tgt); + } + + FlushIns(ArgLocs, rl_method); + + FreeTemp(rX86_ARG0); + FreeTemp(rX86_ARG1); + FreeTemp(rX86_ARG2); +} + +void X86Mir2Lir::GenExitSequence() { + /* + * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't + * allocated by the register utilities as temps. + */ + LockTemp(rX86_RET0); + LockTemp(rX86_RET1); + + NewLIR0(kPseudoMethodExit); + UnSpillCoreRegs(); + /* Remove frame except for return address */ + OpRegImm(kOpAdd, rX86_SP, frame_size_ - 4); + NewLIR0(kX86Ret); +} + +} // namespace art diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h new file mode 100644 index 0000000000..3e30141594 --- /dev/null +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_X86_CODEGENX86_H_ +#define ART_SRC_COMPILER_DEX_QUICK_X86_CODEGENX86_H_ + +#include "dex/compiler_internals.h" +#include "x86_lir.h" + +namespace art { + +class X86Mir2Lir : public Mir2Lir { + public: + + X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); + + // Required for target - codegen helpers. + bool SmallLiteralDivide(Instruction::Code dalvik_opcode, RegLocation rl_src, + RegLocation rl_dest, int lit); + int LoadHelper(int offset); + LIR* LoadBaseDisp(int rBase, int displacement, int r_dest, OpSize size, int s_reg); + LIR* LoadBaseDispWide(int rBase, int displacement, int r_dest_lo, int r_dest_hi, + int s_reg); + LIR* LoadBaseIndexed(int rBase, int r_index, int r_dest, int scale, OpSize size); + LIR* LoadBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_dest, int r_dest_hi, OpSize size, int s_reg); + LIR* LoadConstantNoClobber(int r_dest, int value); + LIR* LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value); + LIR* StoreBaseDisp(int rBase, int displacement, int r_src, OpSize size); + LIR* StoreBaseDispWide(int rBase, int displacement, int r_src_lo, int r_src_hi); + LIR* StoreBaseIndexed(int rBase, int r_index, int r_src, int scale, OpSize size); + LIR* StoreBaseIndexedDisp(int rBase, int r_index, int scale, int displacement, + int r_src, int r_src_hi, OpSize size, int s_reg); + void MarkGCCard(int val_reg, int tgt_addr_reg); + + // Required for target - register utilities. + bool IsFpReg(int reg); + bool SameRegType(int reg1, int reg2); + int AllocTypedTemp(bool fp_hint, int reg_class); + int AllocTypedTempPair(bool fp_hint, int reg_class); + int S2d(int low_reg, int high_reg); + int TargetReg(SpecialTargetRegister reg); + RegisterInfo* GetRegInfo(int reg); + RegLocation GetReturnAlt(); + RegLocation GetReturnWideAlt(); + RegLocation LocCReturn(); + RegLocation LocCReturnDouble(); + RegLocation LocCReturnFloat(); + RegLocation LocCReturnWide(); + uint32_t FpRegMask(); + uint64_t GetRegMaskCommon(int reg); + void AdjustSpillMask(); + void ClobberCalleeSave(); + void FlushReg(int reg); + void FlushRegWide(int reg1, int reg2); + void FreeCallTemps(); + void FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free); + void LockCallTemps(); + void MarkPreservedSingle(int v_reg, int reg); + void CompilerInitializeRegAlloc(); + + // Required for target - miscellaneous. + AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); + void SetupTargetResourceMasks(LIR* lir); + const char* GetTargetInstFmt(int opcode); + const char* GetTargetInstName(int opcode); + std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); + uint64_t GetPCUseDefEncoding(); + uint64_t GetTargetInstFlags(int opcode); + int GetInsnSize(LIR* lir); + bool IsUnconditionalBranch(LIR* lir); + + // Required for target - Dalvik-level generators. + void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArrayObjPut(int opt_flags, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale); + void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale); + void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2); + void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2); + void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); + bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); + bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); + bool GenInlinedSqrt(CallInfo* info); + void GenNegLong(RegLocation rl_dest, RegLocation rl_src); + void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset, + ThrowKind kind); + RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div); + RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit, bool is_div); + void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenDivZeroCheck(int reg_lo, int reg_hi); + void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); + void GenExitSequence(); + void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); + void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); + void GenSelect(BasicBlock* bb, MIR* mir); + void GenMemBarrier(MemBarrierKind barrier_kind); + void GenMonitorEnter(int opt_flags, RegLocation rl_src); + void GenMonitorExit(int opt_flags, RegLocation rl_src); + void GenMoveException(RegLocation rl_dest); + void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, + int lit, int first_bit, int second_bit); + void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); + void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); + + // Single operation generators. + LIR* OpUnconditionalBranch(LIR* target); + LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target); + LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* target); + LIR* OpCondBranch(ConditionCode cc, LIR* target); + LIR* OpDecAndBranch(ConditionCode c_code, int reg, LIR* target); + LIR* OpFpRegCopy(int r_dest, int r_src); + LIR* OpIT(ConditionCode cond, const char* guide); + LIR* OpMem(OpKind op, int rBase, int disp); + LIR* OpPcRelLoad(int reg, LIR* target); + LIR* OpReg(OpKind op, int r_dest_src); + LIR* OpRegCopy(int r_dest, int r_src); + LIR* OpRegCopyNoInsert(int r_dest, int r_src); + LIR* OpRegImm(OpKind op, int r_dest_src1, int value); + LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset); + LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2); + LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value); + LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2); + LIR* OpTestSuspend(LIR* target); + LIR* OpThreadMem(OpKind op, int thread_offset); + LIR* OpVldm(int rBase, int count); + LIR* OpVstm(int rBase, int count); + void OpLea(int rBase, int reg1, int reg2, int scale, int offset); + void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, int src_hi); + void OpTlsCmp(int offset, int val); + + void OpRegThreadMem(OpKind op, int r_dest, int thread_offset); + void SpillCoreRegs(); + void UnSpillCoreRegs(); + static const X86EncodingMap EncodingMap[kX86Last]; + bool InexpensiveConstantInt(int32_t value); + bool InexpensiveConstantFloat(int32_t value); + bool InexpensiveConstantLong(int64_t value); + bool InexpensiveConstantDouble(int64_t value); + + private: + void EmitDisp(int base, int disp); + void EmitOpReg(const X86EncodingMap* entry, uint8_t reg); + void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp); + void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg); + void EmitRegMem(const X86EncodingMap* entry, uint8_t reg, uint8_t base, int disp); + void EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index, + int scale, int disp); + void EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp, + uint8_t reg); + void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp); + void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2); + void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm); + void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); + void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm); + void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); + void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); + void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl); + void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition); + void EmitJmp(const X86EncodingMap* entry, int rel); + void EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc); + void EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp); + void EmitCallThread(const X86EncodingMap* entry, int disp); + void EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index, + int scale, int table_or_disp); + void EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset); + void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir); +}; + +} // namespace art + +#endif // ART_SRC_COMPILER_DEX_QUICK_X86_CODEGENX86_H_ diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc new file mode 100644 index 0000000000..906b4cc759 --- /dev/null +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -0,0 +1,378 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_x86.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "x86_lir.h" + +namespace art { + +void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + X86OpCode op = kX86Nop; + RegLocation rl_result; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (opcode) { + case Instruction::ADD_FLOAT_2ADDR: + case Instruction::ADD_FLOAT: + op = kX86AddssRR; + break; + case Instruction::SUB_FLOAT_2ADDR: + case Instruction::SUB_FLOAT: + op = kX86SubssRR; + break; + case Instruction::DIV_FLOAT_2ADDR: + case Instruction::DIV_FLOAT: + op = kX86DivssRR; + break; + case Instruction::MUL_FLOAT_2ADDR: + case Instruction::MUL_FLOAT: + op = kX86MulssRR; + break; + case Instruction::REM_FLOAT_2ADDR: + case Instruction::REM_FLOAT: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2, false); + rl_result = GetReturn(true); + StoreValue(rl_dest, rl_result); + return; + case Instruction::NEG_FLOAT: + GenNegFloat(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + rl_result = EvalLoc(rl_dest, kFPReg, true); + int r_dest = rl_result.low_reg; + int r_src1 = rl_src1.low_reg; + int r_src2 = rl_src2.low_reg; + if (r_dest == r_src2) { + r_src2 = AllocTempFloat(); + OpRegCopy(r_src2, r_dest); + } + OpRegCopy(r_dest, r_src1); + NewLIR2(op, r_dest, r_src2); + StoreValue(rl_dest, rl_result); +} + +void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + X86OpCode op = kX86Nop; + RegLocation rl_result; + + switch (opcode) { + case Instruction::ADD_DOUBLE_2ADDR: + case Instruction::ADD_DOUBLE: + op = kX86AddsdRR; + break; + case Instruction::SUB_DOUBLE_2ADDR: + case Instruction::SUB_DOUBLE: + op = kX86SubsdRR; + break; + case Instruction::DIV_DOUBLE_2ADDR: + case Instruction::DIV_DOUBLE: + op = kX86DivsdRR; + break; + case Instruction::MUL_DOUBLE_2ADDR: + case Instruction::MUL_DOUBLE: + op = kX86MulsdRR; + break; + case Instruction::REM_DOUBLE_2ADDR: + case Instruction::REM_DOUBLE: + FlushAllRegs(); // Send everything to home location + CallRuntimeHelperRegLocationRegLocation(ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2, false); + rl_result = GetReturnWide(true); + StoreValueWide(rl_dest, rl_result); + return; + case Instruction::NEG_DOUBLE: + GenNegDouble(rl_dest, rl_src1); + return; + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + } + rl_src1 = LoadValueWide(rl_src1, kFPReg); + DCHECK(rl_src1.wide); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + DCHECK(rl_src2.wide); + rl_result = EvalLoc(rl_dest, kFPReg, true); + DCHECK(rl_dest.wide); + DCHECK(rl_result.wide); + int r_dest = S2d(rl_result.low_reg, rl_result.high_reg); + int r_src1 = S2d(rl_src1.low_reg, rl_src1.high_reg); + int r_src2 = S2d(rl_src2.low_reg, rl_src2.high_reg); + if (r_dest == r_src2) { + r_src2 = AllocTempDouble() | X86_FP_DOUBLE; + OpRegCopy(r_src2, r_dest); + } + OpRegCopy(r_dest, r_src1); + NewLIR2(op, r_dest, r_src2); + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src) { + RegisterClass rcSrc = kFPReg; + X86OpCode op = kX86Nop; + int src_reg; + RegLocation rl_result; + switch (opcode) { + case Instruction::INT_TO_FLOAT: + rcSrc = kCoreReg; + op = kX86Cvtsi2ssRR; + break; + case Instruction::DOUBLE_TO_FLOAT: + rcSrc = kFPReg; + op = kX86Cvtsd2ssRR; + break; + case Instruction::FLOAT_TO_DOUBLE: + rcSrc = kFPReg; + op = kX86Cvtss2sdRR; + break; + case Instruction::INT_TO_DOUBLE: + rcSrc = kCoreReg; + op = kX86Cvtsi2sdRR; + break; + case Instruction::FLOAT_TO_INT: { + rl_src = LoadValue(rl_src, kFPReg); + src_reg = rl_src.low_reg; + // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + int temp_reg = AllocTempFloat(); + + LoadConstant(rl_result.low_reg, 0x7fffffff); + NewLIR2(kX86Cvtsi2ssRR, temp_reg, rl_result.low_reg); + NewLIR2(kX86ComissRR, src_reg, temp_reg); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); + NewLIR2(kX86Cvttss2siRR, rl_result.low_reg, src_reg); + LIR* branch_normal = NewLIR1(kX86Jmp8, 0); + branch_na_n->target = NewLIR0(kPseudoTargetLabel); + NewLIR2(kX86Xor32RR, rl_result.low_reg, rl_result.low_reg); + branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); + branch_normal->target = NewLIR0(kPseudoTargetLabel); + StoreValue(rl_dest, rl_result); + return; + } + case Instruction::DOUBLE_TO_INT: { + rl_src = LoadValueWide(rl_src, kFPReg); + src_reg = rl_src.low_reg; + // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + int temp_reg = AllocTempDouble() | X86_FP_DOUBLE; + + LoadConstant(rl_result.low_reg, 0x7fffffff); + NewLIR2(kX86Cvtsi2sdRR, temp_reg, rl_result.low_reg); + NewLIR2(kX86ComisdRR, src_reg, temp_reg); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); + NewLIR2(kX86Cvttsd2siRR, rl_result.low_reg, src_reg); + LIR* branch_normal = NewLIR1(kX86Jmp8, 0); + branch_na_n->target = NewLIR0(kPseudoTargetLabel); + NewLIR2(kX86Xor32RR, rl_result.low_reg, rl_result.low_reg); + branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); + branch_normal->target = NewLIR0(kPseudoTargetLabel); + StoreValue(rl_dest, rl_result); + return; + } + case Instruction::LONG_TO_DOUBLE: + GenConversionCall(ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src); + return; + case Instruction::LONG_TO_FLOAT: + // TODO: inline by using memory as a 64-bit source. Be careful about promoted registers. + GenConversionCall(ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src); + return; + case Instruction::FLOAT_TO_LONG: + GenConversionCall(ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src); + return; + case Instruction::DOUBLE_TO_LONG: + GenConversionCall(ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src); + return; + default: + LOG(INFO) << "Unexpected opcode: " << opcode; + } + if (rl_src.wide) { + rl_src = LoadValueWide(rl_src, rcSrc); + src_reg = S2d(rl_src.low_reg, rl_src.high_reg); + } else { + rl_src = LoadValue(rl_src, rcSrc); + src_reg = rl_src.low_reg; + } + if (rl_dest.wide) { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, S2d(rl_result.low_reg, rl_result.high_reg), src_reg); + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, kFPReg, true); + NewLIR2(op, rl_result.low_reg, src_reg); + StoreValue(rl_dest, rl_result); + } +} + +void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) { + bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT); + bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT); + int src_reg1; + int src_reg2; + if (single) { + rl_src1 = LoadValue(rl_src1, kFPReg); + src_reg1 = rl_src1.low_reg; + rl_src2 = LoadValue(rl_src2, kFPReg); + src_reg2 = rl_src2.low_reg; + } else { + rl_src1 = LoadValueWide(rl_src1, kFPReg); + src_reg1 = S2d(rl_src1.low_reg, rl_src1.high_reg); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + src_reg2 = S2d(rl_src2.low_reg, rl_src2.high_reg); + } + // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + LoadConstantNoClobber(rl_result.low_reg, unordered_gt ? 1 : 0); + if (single) { + NewLIR2(kX86UcomissRR, src_reg1, src_reg2); + } else { + NewLIR2(kX86UcomisdRR, src_reg1, src_reg2); + } + LIR* branch = NULL; + if (unordered_gt) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + } + // If the result reg can't be byte accessed, use a jump and move instead of a set. + if (rl_result.low_reg >= 4) { + LIR* branch2 = NULL; + if (unordered_gt) { + branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA); + NewLIR2(kX86Mov32RI, rl_result.low_reg, 0x0); + } else { + branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe); + NewLIR2(kX86Mov32RI, rl_result.low_reg, 0x1); + } + branch2->target = NewLIR0(kPseudoTargetLabel); + } else { + NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondA /* above - unsigned > */); + } + NewLIR2(kX86Sbb32RI, rl_result.low_reg, 0); + if (unordered_gt) { + branch->target = NewLIR0(kPseudoTargetLabel); + } + StoreValue(rl_dest, rl_result); +} + +void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, + bool is_double) { + LIR* taken = &block_label_list_[bb->taken->id]; + LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* branch = NULL; + RegLocation rl_src1; + RegLocation rl_src2; + if (is_double) { + rl_src1 = mir_graph_->GetSrcWide(mir, 0); + rl_src2 = mir_graph_->GetSrcWide(mir, 2); + rl_src1 = LoadValueWide(rl_src1, kFPReg); + rl_src2 = LoadValueWide(rl_src2, kFPReg); + NewLIR2(kX86UcomisdRR, S2d(rl_src1.low_reg, rl_src1.high_reg), + S2d(rl_src2.low_reg, rl_src2.high_reg)); + } else { + rl_src1 = mir_graph_->GetSrc(mir, 0); + rl_src2 = mir_graph_->GetSrc(mir, 1); + rl_src1 = LoadValue(rl_src1, kFPReg); + rl_src2 = LoadValue(rl_src2, kFPReg); + NewLIR2(kX86UcomissRR, rl_src1.low_reg, rl_src2.low_reg); + } + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + switch (ccode) { + case kCondEq: + if (!gt_bias) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + branch->target = not_taken; + } + break; + case kCondNe: + if (!gt_bias) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + branch->target = taken; + } + break; + case kCondLt: + if (gt_bias) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + branch->target = not_taken; + } + ccode = kCondCs; + break; + case kCondLe: + if (gt_bias) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + branch->target = not_taken; + } + ccode = kCondLs; + break; + case kCondGt: + if (gt_bias) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + branch->target = taken; + } + ccode = kCondHi; + break; + case kCondGe: + if (gt_bias) { + branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); + branch->target = taken; + } + ccode = kCondCc; + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCondBranch(ccode, taken); +} + +void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result; + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegImm(kOpAdd, rl_result.low_reg, rl_src.low_reg, 0x80000000); + StoreValue(rl_dest, rl_result); +} + +void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) +{ + RegLocation rl_result; + rl_src = LoadValueWide(rl_src, kCoreReg); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegImm(kOpAdd, rl_result.high_reg, rl_src.high_reg, 0x80000000); + OpRegCopy(rl_result.low_reg, rl_src.low_reg); + StoreValueWide(rl_dest, rl_result); +} + +bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) { + DCHECK_NE(cu_->instruction_set, kThumb2); + return false; +} + + + +} // namespace art diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc new file mode 100644 index 0000000000..97d9d2deed --- /dev/null +++ b/compiler/dex/quick/x86/int_x86.cc @@ -0,0 +1,606 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains codegen for the X86 ISA */ + +#include "codegen_x86.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "mirror/array.h" +#include "x86_lir.h" + +namespace art { + +/* + * Perform register memory operation. + */ +LIR* X86Mir2Lir::GenRegMemCheck(ConditionCode c_code, + int reg1, int base, int offset, ThrowKind kind) +{ + LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, + current_dalvik_offset_, reg1, base, offset); + OpRegMem(kOpCmp, reg1, base, offset); + LIR* branch = OpCondBranch(c_code, tgt); + // Remember branch target - will process later + throw_launchpads_.Insert(tgt); + return branch; +} + +/* + * Compare two 64-bit values + * x = y return 0 + * x < y return -1 + * x > y return 1 + */ +void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + // Compute (r1:r0) = (r1:r0) - (r3:r2) + OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 + OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF + NewLIR2(kX86Set8R, r2, kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 + NewLIR2(kX86Movzx8RR, r2, r2); + OpReg(kOpNeg, r2); // r2 = -r2 + OpRegReg(kOpOr, r0, r1); // r0 = high | low - sets ZF + NewLIR2(kX86Set8R, r0, kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 + NewLIR2(kX86Movzx8RR, r0, r0); + OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2 + RegLocation rl_result = LocCReturn(); + StoreValue(rl_dest, rl_result); +} + +X86ConditionCode X86ConditionEncoding(ConditionCode cond) { + switch (cond) { + case kCondEq: return kX86CondEq; + case kCondNe: return kX86CondNe; + case kCondCs: return kX86CondC; + case kCondCc: return kX86CondNc; + case kCondMi: return kX86CondS; + case kCondPl: return kX86CondNs; + case kCondVs: return kX86CondO; + case kCondVc: return kX86CondNo; + case kCondHi: return kX86CondA; + case kCondLs: return kX86CondBe; + case kCondGe: return kX86CondGe; + case kCondLt: return kX86CondL; + case kCondGt: return kX86CondG; + case kCondLe: return kX86CondLe; + case kCondAl: + case kCondNv: LOG(FATAL) << "Should not reach here"; + } + return kX86CondO; +} + +LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, + LIR* target) +{ + NewLIR2(kX86Cmp32RR, src1, src2); + X86ConditionCode cc = X86ConditionEncoding(cond); + LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , + cc); + branch->target = target; + return branch; +} + +LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, + int check_value, LIR* target) +{ + if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { + // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode + NewLIR2(kX86Test32RR, reg, reg); + } else { + NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg, check_value); + } + X86ConditionCode cc = X86ConditionEncoding(cond); + LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); + branch->target = target; + return branch; +} + +LIR* X86Mir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) +{ + if (X86_FPREG(r_dest) || X86_FPREG(r_src)) + return OpFpRegCopy(r_dest, r_src); + LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, + r_dest, r_src); + if (r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +LIR* X86Mir2Lir::OpRegCopy(int r_dest, int r_src) +{ + LIR *res = OpRegCopyNoInsert(r_dest, r_src); + AppendLIR(res); + return res; +} + +void X86Mir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, + int src_lo, int src_hi) +{ + bool dest_fp = X86_FPREG(dest_lo) && X86_FPREG(dest_hi); + bool src_fp = X86_FPREG(src_lo) && X86_FPREG(src_hi); + assert(X86_FPREG(src_lo) == X86_FPREG(src_hi)); + assert(X86_FPREG(dest_lo) == X86_FPREG(dest_hi)); + if (dest_fp) { + if (src_fp) { + OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); + } else { + // TODO: Prevent this from happening in the code. The result is often + // unused or could have been loaded more easily from memory. + NewLIR2(kX86MovdxrRR, dest_lo, src_lo); + NewLIR2(kX86MovdxrRR, dest_hi, src_hi); + NewLIR2(kX86PsllqRI, dest_hi, 32); + NewLIR2(kX86OrpsRR, dest_lo, dest_hi); + } + } else { + if (src_fp) { + NewLIR2(kX86MovdrxRR, dest_lo, src_lo); + NewLIR2(kX86PsrlqRI, src_lo, 32); + NewLIR2(kX86MovdrxRR, dest_hi, src_lo); + } else { + // Handle overlap + if (src_hi == dest_lo) { + OpRegCopy(dest_hi, src_hi); + OpRegCopy(dest_lo, src_lo); + } else { + OpRegCopy(dest_lo, src_lo); + OpRegCopy(dest_hi, src_hi); + } + } + } +} + +void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) +{ + UNIMPLEMENTED(FATAL) << "Need codegen for GenSelect"; +} + +void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { + LIR* taken = &block_label_list_[bb->taken->id]; + RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); + RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + // Swap operands and condition code to prevent use of zero flag. + if (ccode == kCondLe || ccode == kCondGt) { + // Compute (r3:r2) = (r3:r2) - (r1:r0) + OpRegReg(kOpSub, r2, r0); // r2 = r2 - r0 + OpRegReg(kOpSbc, r3, r1); // r3 = r3 - r1 - CF + } else { + // Compute (r1:r0) = (r1:r0) - (r3:r2) + OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 + OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF + } + switch (ccode) { + case kCondEq: + case kCondNe: + OpRegReg(kOpOr, r0, r1); // r0 = r0 | r1 + break; + case kCondLe: + ccode = kCondGe; + break; + case kCondGt: + ccode = kCondLt; + break; + case kCondLt: + case kCondGe: + break; + default: + LOG(FATAL) << "Unexpected ccode: " << ccode; + } + OpCondBranch(ccode, taken); +} + +RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo, + int lit, bool is_div) +{ + LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; + return rl_dest; +} + +RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo, + int reg_hi, bool is_div) +{ + LOG(FATAL) << "Unexpected use of GenDivRem for x86"; + return rl_dest; +} + +bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) +{ + DCHECK_EQ(cu_->instruction_set, kX86); + RegLocation rl_src1 = info->args[0]; + RegLocation rl_src2 = info->args[1]; + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg); + DCHECK_EQ(cu_->instruction_set, kX86); + LIR* branch = NewLIR2(kX86Jcc8, 0, is_min ? kX86CondG : kX86CondL); + OpRegReg(kOpMov, rl_result.low_reg, rl_src1.low_reg); + LIR* branch2 = NewLIR1(kX86Jmp8, 0); + branch->target = NewLIR0(kPseudoTargetLabel); + OpRegReg(kOpMov, rl_result.low_reg, rl_src2.low_reg); + branch2->target = NewLIR0(kPseudoTargetLabel); + StoreValue(rl_dest, rl_result); + return true; +} + +void X86Mir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) +{ + NewLIR5(kX86Lea32RA, rBase, reg1, reg2, scale, offset); +} + +void X86Mir2Lir::OpTlsCmp(int offset, int val) +{ + NewLIR2(kX86Cmp16TI8, offset, val); +} + +bool X86Mir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) { + DCHECK_NE(cu_->instruction_set, kThumb2); + return false; +} + +LIR* X86Mir2Lir::OpPcRelLoad(int reg, LIR* target) { + LOG(FATAL) << "Unexpected use of OpPcRelLoad for x86"; + return NULL; +} + +LIR* X86Mir2Lir::OpVldm(int rBase, int count) +{ + LOG(FATAL) << "Unexpected use of OpVldm for x86"; + return NULL; +} + +LIR* X86Mir2Lir::OpVstm(int rBase, int count) +{ + LOG(FATAL) << "Unexpected use of OpVstm for x86"; + return NULL; +} + +void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, + RegLocation rl_result, int lit, + int first_bit, int second_bit) +{ + int t_reg = AllocTemp(); + OpRegRegImm(kOpLsl, t_reg, rl_src.low_reg, second_bit - first_bit); + OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, t_reg); + FreeTemp(t_reg); + if (first_bit != 0) { + OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit); + } +} + +void X86Mir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) +{ + int t_reg = AllocTemp(); + OpRegRegReg(kOpOr, t_reg, reg_lo, reg_hi); + GenImmedCheck(kCondEq, t_reg, 0, kThrowDivZero); + FreeTemp(t_reg); +} + +// Test suspend flag, return target of taken suspend branch +LIR* X86Mir2Lir::OpTestSuspend(LIR* target) +{ + OpTlsCmp(Thread::ThreadFlagsOffset().Int32Value(), 0); + return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); +} + +// Decrement register and branch on condition +LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) +{ + OpRegImm(kOpSub, reg, 1); + return OpCmpImmBranch(c_code, reg, 0, target); +} + +bool X86Mir2Lir::SmallLiteralDivide(Instruction::Code dalvik_opcode, + RegLocation rl_src, RegLocation rl_dest, int lit) +{ + LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; + return false; +} + +LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) +{ + LOG(FATAL) << "Unexpected use of OpIT in x86"; + return NULL; +} + +void X86Mir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + LOG(FATAL) << "Unexpected use of GenX86Long for x86"; +} +void X86Mir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart + // enough. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + OpRegReg(kOpAdd, r0, r2); // r0 = r0 + r2 + OpRegReg(kOpAdc, r1, r3); // r1 = r1 + r3 + CF + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart + // enough. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 + OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1, + RegLocation rl_src2) +{ + // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart + // enough. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + // Compute (r1:r0) = (r1:r0) & (r2:r3) + OpRegReg(kOpAnd, r0, r2); // r0 = r0 & r2 + OpRegReg(kOpAnd, r1, r3); // r1 = r1 & r3 + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenOrLong(RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart + // enough. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + // Compute (r1:r0) = (r1:r0) | (r2:r3) + OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2 + OpRegReg(kOpOr, r1, r3); // r1 = r1 | r3 + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenXorLong(RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_src2) +{ + // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart + // enough. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src1, r0, r1); + LoadValueDirectWideFixed(rl_src2, r2, r3); + // Compute (r1:r0) = (r1:r0) ^ (r2:r3) + OpRegReg(kOpXor, r0, r2); // r0 = r0 ^ r2 + OpRegReg(kOpXor, r1, r3); // r1 = r1 ^ r3 + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) +{ + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage + LoadValueDirectWideFixed(rl_src, r0, r1); + // Compute (r1:r0) = -(r1:r0) + OpRegReg(kOpNeg, r0, r0); // r0 = -r0 + OpRegImm(kOpAdc, r1, 0); // r1 = r1 + CF + OpRegReg(kOpNeg, r1, r1); // r1 = -r1 + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, + INVALID_SREG, INVALID_SREG}; + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, int thread_offset) { + X86OpCode opcode = kX86Bkpt; + switch (op) { + case kOpCmp: opcode = kX86Cmp32RT; break; + case kOpMov: opcode = kX86Mov32RT; break; + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + NewLIR2(opcode, r_dest, thread_offset); +} + +/* + * Generate array load + */ +void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_dest, int scale) +{ + RegisterClass reg_class = oat_reg_class_by_size(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + RegLocation rl_result; + rl_array = LoadValue(rl_array, kCoreReg); + rl_index = LoadValue(rl_index, kCoreReg); + + if (size == kLong || size == kDouble) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + /* null object? */ + GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); + + if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { + /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */ + GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, + len_offset, kThrowArrayBounds); + } + if ((size == kLong) || (size == kDouble)) { + int reg_addr = AllocTemp(); + OpLea(reg_addr, rl_array.low_reg, rl_index.low_reg, scale, data_offset); + FreeTemp(rl_array.low_reg); + FreeTemp(rl_index.low_reg); + rl_result = EvalLoc(rl_dest, reg_class, true); + LoadBaseIndexedDisp(reg_addr, INVALID_REG, 0, 0, rl_result.low_reg, + rl_result.high_reg, size, INVALID_SREG); + StoreValueWide(rl_dest, rl_result); + } else { + rl_result = EvalLoc(rl_dest, reg_class, true); + + LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, + data_offset, rl_result.low_reg, INVALID_REG, size, + INVALID_SREG); + + StoreValue(rl_dest, rl_result); + } +} + +/* + * Generate array store + * + */ +void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) +{ + RegisterClass reg_class = oat_reg_class_by_size(size); + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset; + + if (size == kLong || size == kDouble) { + data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); + } else { + data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); + } + + rl_array = LoadValue(rl_array, kCoreReg); + rl_index = LoadValue(rl_index, kCoreReg); + + /* null object? */ + GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); + + if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { + /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */ + GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, len_offset, kThrowArrayBounds); + } + if ((size == kLong) || (size == kDouble)) { + rl_src = LoadValueWide(rl_src, reg_class); + } else { + rl_src = LoadValue(rl_src, reg_class); + } + // If the src reg can't be byte accessed, move it to a temp first. + if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) { + int temp = AllocTemp(); + OpRegCopy(temp, rl_src.low_reg); + StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp, + INVALID_REG, size, INVALID_SREG); + } else { + StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg, + rl_src.high_reg, size, INVALID_SREG); + } +} + +/* + * Generate array store + * + */ +void X86Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, + RegLocation rl_index, RegLocation rl_src, int scale) +{ + int len_offset = mirror::Array::LengthOffset().Int32Value(); + int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); + + FlushAllRegs(); // Use explicit registers + LockCallTemps(); + + int r_value = TargetReg(kArg0); // Register holding value + int r_array_class = TargetReg(kArg1); // Register holding array's Class + int r_array = TargetReg(kArg2); // Register holding array + int r_index = TargetReg(kArg3); // Register holding index into array + + LoadValueDirectFixed(rl_array, r_array); // Grab array + LoadValueDirectFixed(rl_src, r_value); // Grab value + LoadValueDirectFixed(rl_index, r_index); // Grab index + + GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? + + // Store of null? + LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); + + // Get the array's class. + LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); + CallRuntimeHelperRegReg(ENTRYPOINT_OFFSET(pCanPutArrayElementFromCode), r_value, + r_array_class, true); + // Redo LoadValues in case they didn't survive the call. + LoadValueDirectFixed(rl_array, r_array); // Reload array + LoadValueDirectFixed(rl_index, r_index); // Reload index + LoadValueDirectFixed(rl_src, r_value); // Reload value + r_array_class = INVALID_REG; + + // Branch here if value to be stored == null + LIR* target = NewLIR0(kPseudoTargetLabel); + null_value_check->target = target; + + // make an extra temp available for card mark below + FreeTemp(TargetReg(kArg1)); + if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { + /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */ + GenRegMemCheck(kCondUge, r_index, r_array, len_offset, kThrowArrayBounds); + } + StoreBaseIndexedDisp(r_array, r_index, scale, + data_offset, r_value, INVALID_REG, kWord, INVALID_SREG); + FreeTemp(r_index); + if (!mir_graph_->IsConstantNullRef(rl_src)) { + MarkGCCard(r_value, r_array); + } +} + +void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) +{ + // Default implementation is just to ignore the constant case. + GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift); +} + +void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, + RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) +{ + // Default - bail to non-const handler. + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); +} + +} // namespace art diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc new file mode 100644 index 0000000000..c421ef3f11 --- /dev/null +++ b/compiler/dex/quick/x86/target_x86.cc @@ -0,0 +1,571 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_x86.h" +#include "dex/compiler_internals.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "x86_lir.h" + +#include <string> + +namespace art { + +//FIXME: restore "static" when usage uncovered +/*static*/ int core_regs[] = { + rAX, rCX, rDX, rBX, rX86_SP, rBP, rSI, rDI +#ifdef TARGET_REX_SUPPORT + r8, r9, r10, r11, r12, r13, r14, 15 +#endif +}; +/*static*/ int ReservedRegs[] = {rX86_SP}; +/*static*/ int core_temps[] = {rAX, rCX, rDX, rBX}; +/*static*/ int FpRegs[] = { + fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7, +#ifdef TARGET_REX_SUPPORT + fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15 +#endif +}; +/*static*/ int fp_temps[] = { + fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7, +#ifdef TARGET_REX_SUPPORT + fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15 +#endif +}; + +RegLocation X86Mir2Lir::LocCReturn() +{ + RegLocation res = X86_LOC_C_RETURN; + return res; +} + +RegLocation X86Mir2Lir::LocCReturnWide() +{ + RegLocation res = X86_LOC_C_RETURN_WIDE; + return res; +} + +RegLocation X86Mir2Lir::LocCReturnFloat() +{ + RegLocation res = X86_LOC_C_RETURN_FLOAT; + return res; +} + +RegLocation X86Mir2Lir::LocCReturnDouble() +{ + RegLocation res = X86_LOC_C_RETURN_DOUBLE; + return res; +} + +// Return a target-dependent special register. +int X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { + int res = INVALID_REG; + switch (reg) { + case kSelf: res = rX86_SELF; break; + case kSuspend: res = rX86_SUSPEND; break; + case kLr: res = rX86_LR; break; + case kPc: res = rX86_PC; break; + case kSp: res = rX86_SP; break; + case kArg0: res = rX86_ARG0; break; + case kArg1: res = rX86_ARG1; break; + case kArg2: res = rX86_ARG2; break; + case kArg3: res = rX86_ARG3; break; + case kFArg0: res = rX86_FARG0; break; + case kFArg1: res = rX86_FARG1; break; + case kFArg2: res = rX86_FARG2; break; + case kFArg3: res = rX86_FARG3; break; + case kRet0: res = rX86_RET0; break; + case kRet1: res = rX86_RET1; break; + case kInvokeTgt: res = rX86_INVOKE_TGT; break; + case kCount: res = rX86_COUNT; break; + } + return res; +} + +// Create a double from a pair of singles. +int X86Mir2Lir::S2d(int low_reg, int high_reg) +{ + return X86_S2D(low_reg, high_reg); +} + +// Return mask to strip off fp reg flags and bias. +uint32_t X86Mir2Lir::FpRegMask() +{ + return X86_FP_REG_MASK; +} + +// True if both regs single, both core or both double. +bool X86Mir2Lir::SameRegType(int reg1, int reg2) +{ + return (X86_REGTYPE(reg1) == X86_REGTYPE(reg2)); +} + +/* + * Decode the register id. + */ +uint64_t X86Mir2Lir::GetRegMaskCommon(int reg) +{ + uint64_t seed; + int shift; + int reg_id; + + reg_id = reg & 0xf; + /* Double registers in x86 are just a single FP register */ + seed = 1; + /* FP register starts at bit position 16 */ + shift = X86_FPREG(reg) ? kX86FPReg0 : 0; + /* Expand the double register id into single offset */ + shift += reg_id; + return (seed << shift); +} + +uint64_t X86Mir2Lir::GetPCUseDefEncoding() +{ + /* + * FIXME: might make sense to use a virtual resource encoding bit for pc. Might be + * able to clean up some of the x86/Arm_Mips differences + */ + LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86"; + return 0ULL; +} + +void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir) +{ + DCHECK_EQ(cu_->instruction_set, kX86); + + // X86-specific resource map setup here. + uint64_t flags = X86Mir2Lir::EncodingMap[lir->opcode].flags; + + if (flags & REG_USE_SP) { + lir->use_mask |= ENCODE_X86_REG_SP; + } + + if (flags & REG_DEF_SP) { + lir->def_mask |= ENCODE_X86_REG_SP; + } + + if (flags & REG_DEFA) { + SetupRegMask(&lir->def_mask, rAX); + } + + if (flags & REG_DEFD) { + SetupRegMask(&lir->def_mask, rDX); + } + if (flags & REG_USEA) { + SetupRegMask(&lir->use_mask, rAX); + } + + if (flags & REG_USEC) { + SetupRegMask(&lir->use_mask, rCX); + } + + if (flags & REG_USED) { + SetupRegMask(&lir->use_mask, rDX); + } +} + +/* For dumping instructions */ +static const char* x86RegName[] = { + "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" +}; + +static const char* x86CondName[] = { + "O", + "NO", + "B/NAE/C", + "NB/AE/NC", + "Z/EQ", + "NZ/NE", + "BE/NA", + "NBE/A", + "S", + "NS", + "P/PE", + "NP/PO", + "L/NGE", + "NL/GE", + "LE/NG", + "NLE/G" +}; + +/* + * Interpret a format string and build a string no longer than size + * See format key in Assemble.cc. + */ +std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) { + std::string buf; + size_t i = 0; + size_t fmt_len = strlen(fmt); + while (i < fmt_len) { + if (fmt[i] != '!') { + buf += fmt[i]; + i++; + } else { + i++; + DCHECK_LT(i, fmt_len); + char operand_number_ch = fmt[i]; + i++; + if (operand_number_ch == '!') { + buf += "!"; + } else { + int operand_number = operand_number_ch - '0'; + DCHECK_LT(operand_number, 6); // Expect upto 6 LIR operands. + DCHECK_LT(i, fmt_len); + int operand = lir->operands[operand_number]; + switch (fmt[i]) { + case 'c': + DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName)); + buf += x86CondName[operand]; + break; + case 'd': + buf += StringPrintf("%d", operand); + break; + case 'p': { + SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(operand); + buf += StringPrintf("0x%08x", tab_rec->offset); + break; + } + case 'r': + if (X86_FPREG(operand) || X86_DOUBLEREG(operand)) { + int fp_reg = operand & X86_FP_REG_MASK; + buf += StringPrintf("xmm%d", fp_reg); + } else { + DCHECK_LT(static_cast<size_t>(operand), sizeof(x86RegName)); + buf += x86RegName[operand]; + } + break; + case 't': + buf += StringPrintf("0x%08x (L%p)", + reinterpret_cast<uint32_t>(base_addr) + + lir->offset + operand, lir->target); + break; + default: + buf += StringPrintf("DecodeError '%c'", fmt[i]); + break; + } + i++; + } + } + } + return buf; +} + +void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) +{ + char buf[256]; + buf[0] = 0; + + if (mask == ENCODE_ALL) { + strcpy(buf, "all"); + } else { + char num[8]; + int i; + + for (i = 0; i < kX86RegEnd; i++) { + if (mask & (1ULL << i)) { + sprintf(num, "%d ", i); + strcat(buf, num); + } + } + + if (mask & ENCODE_CCODE) { + strcat(buf, "cc "); + } + /* Memory bits */ + if (x86LIR && (mask & ENCODE_DALVIK_REG)) { + sprintf(buf + strlen(buf), "dr%d%s", x86LIR->alias_info & 0xffff, + (x86LIR->alias_info & 0x80000000) ? "(+1)" : ""); + } + if (mask & ENCODE_LITERAL) { + strcat(buf, "lit "); + } + + if (mask & ENCODE_HEAP_REF) { + strcat(buf, "heap "); + } + if (mask & ENCODE_MUST_NOT_ALIAS) { + strcat(buf, "noalias "); + } + } + if (buf[0]) { + LOG(INFO) << prefix << ": " << buf; + } +} + +void X86Mir2Lir::AdjustSpillMask() { + // Adjustment for LR spilling, x86 has no LR so nothing to do here + core_spill_mask_ |= (1 << rRET); + num_core_spills_++; +} + +/* + * Mark a callee-save fp register as promoted. Note that + * vpush/vpop uses contiguous register lists so we must + * include any holes in the mask. Associate holes with + * Dalvik register INVALID_VREG (0xFFFFU). + */ +void X86Mir2Lir::MarkPreservedSingle(int v_reg, int reg) +{ + UNIMPLEMENTED(WARNING) << "MarkPreservedSingle"; +#if 0 + LOG(FATAL) << "No support yet for promoted FP regs"; +#endif +} + +void X86Mir2Lir::FlushRegWide(int reg1, int reg2) +{ + RegisterInfo* info1 = GetRegInfo(reg1); + RegisterInfo* info2 = GetRegInfo(reg2); + DCHECK(info1 && info2 && info1->pair && info2->pair && + (info1->partner == info2->reg) && + (info2->partner == info1->reg)); + if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) { + if (!(info1->is_temp && info2->is_temp)) { + /* Should not happen. If it does, there's a problem in eval_loc */ + LOG(FATAL) << "Long half-temp, half-promoted"; + } + + info1->dirty = false; + info2->dirty = false; + if (mir_graph_->SRegToVReg(info2->s_reg) < mir_graph_->SRegToVReg(info1->s_reg)) + info1 = info2; + int v_reg = mir_graph_->SRegToVReg(info1->s_reg); + StoreBaseDispWide(rX86_SP, VRegOffset(v_reg), info1->reg, info1->partner); + } +} + +void X86Mir2Lir::FlushReg(int reg) +{ + RegisterInfo* info = GetRegInfo(reg); + if (info->live && info->dirty) { + info->dirty = false; + int v_reg = mir_graph_->SRegToVReg(info->s_reg); + StoreBaseDisp(rX86_SP, VRegOffset(v_reg), reg, kWord); + } +} + +/* Give access to the target-dependent FP register encoding to common code */ +bool X86Mir2Lir::IsFpReg(int reg) { + return X86_FPREG(reg); +} + +/* Clobber all regs that might be used by an external C call */ +void X86Mir2Lir::ClobberCalleeSave() +{ + Clobber(rAX); + Clobber(rCX); + Clobber(rDX); +} + +RegLocation X86Mir2Lir::GetReturnWideAlt() { + RegLocation res = LocCReturnWide(); + CHECK(res.low_reg == rAX); + CHECK(res.high_reg == rDX); + Clobber(rAX); + Clobber(rDX); + MarkInUse(rAX); + MarkInUse(rDX); + MarkPair(res.low_reg, res.high_reg); + return res; +} + +RegLocation X86Mir2Lir::GetReturnAlt() +{ + RegLocation res = LocCReturn(); + res.low_reg = rDX; + Clobber(rDX); + MarkInUse(rDX); + return res; +} + +X86Mir2Lir::RegisterInfo* X86Mir2Lir::GetRegInfo(int reg) +{ + return X86_FPREG(reg) ? ®_pool_->FPRegs[reg & X86_FP_REG_MASK] + : ®_pool_->core_regs[reg]; +} + +/* To be used when explicitly managing register use */ +void X86Mir2Lir::LockCallTemps() +{ + LockTemp(rX86_ARG0); + LockTemp(rX86_ARG1); + LockTemp(rX86_ARG2); + LockTemp(rX86_ARG3); +} + +/* To be used when explicitly managing register use */ +void X86Mir2Lir::FreeCallTemps() +{ + FreeTemp(rX86_ARG0); + FreeTemp(rX86_ARG1); + FreeTemp(rX86_ARG2); + FreeTemp(rX86_ARG3); +} + +void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) +{ +#if ANDROID_SMP != 0 + // TODO: optimize fences + NewLIR0(kX86Mfence); +#endif +} +/* + * Alloc a pair of core registers, or a double. Low reg in low byte, + * high reg in next byte. + */ +int X86Mir2Lir::AllocTypedTempPair(bool fp_hint, + int reg_class) +{ + int high_reg; + int low_reg; + int res = 0; + + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { + low_reg = AllocTempDouble(); + high_reg = low_reg + 1; + res = (low_reg & 0xff) | ((high_reg & 0xff) << 8); + return res; + } + + low_reg = AllocTemp(); + high_reg = AllocTemp(); + res = (low_reg & 0xff) | ((high_reg & 0xff) << 8); + return res; +} + +int X86Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) { + if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { + return AllocTempFloat(); + } + return AllocTemp(); +} + +void X86Mir2Lir::CompilerInitializeRegAlloc() { + int num_regs = sizeof(core_regs)/sizeof(*core_regs); + int num_reserved = sizeof(ReservedRegs)/sizeof(*ReservedRegs); + int num_temps = sizeof(core_temps)/sizeof(*core_temps); + int num_fp_regs = sizeof(FpRegs)/sizeof(*FpRegs); + int num_fp_temps = sizeof(fp_temps)/sizeof(*fp_temps); + reg_pool_ = static_cast<RegisterPool*>(arena_->NewMem(sizeof(*reg_pool_), true, + ArenaAllocator::kAllocRegAlloc)); + reg_pool_->num_core_regs = num_regs; + reg_pool_->core_regs = + static_cast<RegisterInfo*>(arena_->NewMem(num_regs * sizeof(*reg_pool_->core_regs), true, + ArenaAllocator::kAllocRegAlloc)); + reg_pool_->num_fp_regs = num_fp_regs; + reg_pool_->FPRegs = + static_cast<RegisterInfo *>(arena_->NewMem(num_fp_regs * sizeof(*reg_pool_->FPRegs), true, + ArenaAllocator::kAllocRegAlloc)); + CompilerInitPool(reg_pool_->core_regs, core_regs, reg_pool_->num_core_regs); + CompilerInitPool(reg_pool_->FPRegs, FpRegs, reg_pool_->num_fp_regs); + // Keep special registers from being allocated + for (int i = 0; i < num_reserved; i++) { + MarkInUse(ReservedRegs[i]); + } + // Mark temp regs - all others not in use can be used for promotion + for (int i = 0; i < num_temps; i++) { + MarkTemp(core_temps[i]); + } + for (int i = 0; i < num_fp_temps; i++) { + MarkTemp(fp_temps[i]); + } +} + +void X86Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, + RegLocation rl_free) +{ + if ((rl_free.low_reg != rl_keep.low_reg) && (rl_free.low_reg != rl_keep.high_reg) && + (rl_free.high_reg != rl_keep.low_reg) && (rl_free.high_reg != rl_keep.high_reg)) { + // No overlap, free both + FreeTemp(rl_free.low_reg); + FreeTemp(rl_free.high_reg); + } +} + +void X86Mir2Lir::SpillCoreRegs() { + if (num_core_spills_ == 0) { + return; + } + // Spill mask not including fake return address register + uint32_t mask = core_spill_mask_ & ~(1 << rRET); + int offset = frame_size_ - (4 * num_core_spills_); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + StoreWordDisp(rX86_SP, offset, reg); + offset += 4; + } + } +} + +void X86Mir2Lir::UnSpillCoreRegs() { + if (num_core_spills_ == 0) { + return; + } + // Spill mask not including fake return address register + uint32_t mask = core_spill_mask_ & ~(1 << rRET); + int offset = frame_size_ - (4 * num_core_spills_); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + LoadWordDisp(rX86_SP, offset, reg); + offset += 4; + } + } +} + +bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) +{ + return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32); +} + +X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) + : Mir2Lir(cu, mir_graph, arena) { + for (int i = 0; i < kX86Last; i++) { + if (X86Mir2Lir::EncodingMap[i].opcode != i) { + LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); + } + } +} + +Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, + ArenaAllocator* const arena) { + return new X86Mir2Lir(cu, mir_graph, arena); +} + +// Not used in x86 +int X86Mir2Lir::LoadHelper(int offset) +{ + LOG(FATAL) << "Unexpected use of LoadHelper in x86"; + return INVALID_REG; +} + +uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) +{ + return X86Mir2Lir::EncodingMap[opcode].flags; +} + +const char* X86Mir2Lir::GetTargetInstName(int opcode) +{ + return X86Mir2Lir::EncodingMap[opcode].name; +} + +const char* X86Mir2Lir::GetTargetInstFmt(int opcode) +{ + return X86Mir2Lir::EncodingMap[opcode].fmt; +} + +} // namespace art diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc new file mode 100644 index 0000000000..fb07ff1e22 --- /dev/null +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -0,0 +1,582 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "codegen_x86.h" +#include "dex/quick/mir_to_lir-inl.h" +#include "x86_lir.h" + +namespace art { + +/* This file contains codegen for the X86 ISA */ + +LIR* X86Mir2Lir::OpFpRegCopy(int r_dest, int r_src) +{ + int opcode; + /* must be both DOUBLE or both not DOUBLE */ + DCHECK_EQ(X86_DOUBLEREG(r_dest), X86_DOUBLEREG(r_src)); + if (X86_DOUBLEREG(r_dest)) { + opcode = kX86MovsdRR; + } else { + if (X86_SINGLEREG(r_dest)) { + if (X86_SINGLEREG(r_src)) { + opcode = kX86MovssRR; + } else { // Fpr <- Gpr + opcode = kX86MovdxrRR; + } + } else { // Gpr <- Fpr + DCHECK(X86_SINGLEREG(r_src)); + opcode = kX86MovdrxRR; + } + } + DCHECK_NE((EncodingMap[opcode].flags & IS_BINARY_OP), 0ULL); + LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest, r_src); + if (r_dest == r_src) { + res->flags.is_nop = true; + } + return res; +} + +bool X86Mir2Lir::InexpensiveConstantInt(int32_t value) +{ + return true; +} + +bool X86Mir2Lir::InexpensiveConstantFloat(int32_t value) +{ + return false; +} + +bool X86Mir2Lir::InexpensiveConstantLong(int64_t value) +{ + return true; +} + +bool X86Mir2Lir::InexpensiveConstantDouble(int64_t value) +{ + return false; // TUNING +} + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool. If target is + * a high register, build constant into a low register and copy. + * + * No additional register clobbering operation performed. Use this version when + * 1) r_dest is freshly returned from AllocTemp or + * 2) The codegen is under fixed register usage + */ +LIR* X86Mir2Lir::LoadConstantNoClobber(int r_dest, int value) +{ + int r_dest_save = r_dest; + if (X86_FPREG(r_dest)) { + if (value == 0) { + return NewLIR2(kX86XorpsRR, r_dest, r_dest); + } + DCHECK(X86_SINGLEREG(r_dest)); + r_dest = AllocTemp(); + } + + LIR *res; + if (value == 0) { + res = NewLIR2(kX86Xor32RR, r_dest, r_dest); + } else { + // Note, there is no byte immediate form of a 32 bit immediate move. + res = NewLIR2(kX86Mov32RI, r_dest, value); + } + + if (X86_FPREG(r_dest_save)) { + NewLIR2(kX86MovdxrRR, r_dest_save, r_dest); + FreeTemp(r_dest); + } + + return res; +} + +LIR* X86Mir2Lir::OpUnconditionalBranch(LIR* target) +{ + LIR* res = NewLIR1(kX86Jmp8, 0 /* offset to be patched during assembly*/ ); + res->target = target; + return res; +} + +LIR* X86Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) +{ + LIR* branch = NewLIR2(kX86Jcc8, 0 /* offset to be patched */, + X86ConditionEncoding(cc)); + branch->target = target; + return branch; +} + +LIR* X86Mir2Lir::OpReg(OpKind op, int r_dest_src) +{ + X86OpCode opcode = kX86Bkpt; + switch (op) { + case kOpNeg: opcode = kX86Neg32R; break; + case kOpNot: opcode = kX86Not32R; break; + case kOpBlx: opcode = kX86CallR; break; + default: + LOG(FATAL) << "Bad case in OpReg " << op; + } + return NewLIR1(opcode, r_dest_src); +} + +LIR* X86Mir2Lir::OpRegImm(OpKind op, int r_dest_src1, int value) +{ + X86OpCode opcode = kX86Bkpt; + bool byte_imm = IS_SIMM8(value); + DCHECK(!X86_FPREG(r_dest_src1)); + switch (op) { + case kOpLsl: opcode = kX86Sal32RI; break; + case kOpLsr: opcode = kX86Shr32RI; break; + case kOpAsr: opcode = kX86Sar32RI; break; + case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break; + case kOpOr: opcode = byte_imm ? kX86Or32RI8 : kX86Or32RI; break; + case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break; + //case kOpSbb: opcode = kX86Sbb32RI; break; + case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break; + case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break; + case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break; + case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break; + case kOpMov: return LoadConstantNoClobber(r_dest_src1, value); + case kOpMul: + opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI; + return NewLIR3(opcode, r_dest_src1, r_dest_src1, value); + default: + LOG(FATAL) << "Bad case in OpRegImm " << op; + } + return NewLIR2(opcode, r_dest_src1, value); +} + +LIR* X86Mir2Lir::OpRegReg(OpKind op, int r_dest_src1, int r_src2) +{ + X86OpCode opcode = kX86Nop; + bool src2_must_be_cx = false; + switch (op) { + // X86 unary opcodes + case kOpMvn: + OpRegCopy(r_dest_src1, r_src2); + return OpReg(kOpNot, r_dest_src1); + case kOpNeg: + OpRegCopy(r_dest_src1, r_src2); + return OpReg(kOpNeg, r_dest_src1); + // X86 binary opcodes + case kOpSub: opcode = kX86Sub32RR; break; + case kOpSbc: opcode = kX86Sbb32RR; break; + case kOpLsl: opcode = kX86Sal32RC; src2_must_be_cx = true; break; + case kOpLsr: opcode = kX86Shr32RC; src2_must_be_cx = true; break; + case kOpAsr: opcode = kX86Sar32RC; src2_must_be_cx = true; break; + case kOpMov: opcode = kX86Mov32RR; break; + case kOpCmp: opcode = kX86Cmp32RR; break; + case kOpAdd: opcode = kX86Add32RR; break; + case kOpAdc: opcode = kX86Adc32RR; break; + case kOpAnd: opcode = kX86And32RR; break; + case kOpOr: opcode = kX86Or32RR; break; + case kOpXor: opcode = kX86Xor32RR; break; + case kOp2Byte: + // Use shifts instead of a byte operand if the source can't be byte accessed. + if (r_src2 >= 4) { + NewLIR2(kX86Mov32RR, r_dest_src1, r_src2); + NewLIR2(kX86Sal32RI, r_dest_src1, 24); + return NewLIR2(kX86Sar32RI, r_dest_src1, 24); + } else { + opcode = kX86Movsx8RR; + } + break; + case kOp2Short: opcode = kX86Movsx16RR; break; + case kOp2Char: opcode = kX86Movzx16RR; break; + case kOpMul: opcode = kX86Imul32RR; break; + default: + LOG(FATAL) << "Bad case in OpRegReg " << op; + break; + } + CHECK(!src2_must_be_cx || r_src2 == rCX); + return NewLIR2(opcode, r_dest_src1, r_src2); +} + +LIR* X86Mir2Lir::OpRegMem(OpKind op, int r_dest, int rBase, + int offset) +{ + X86OpCode opcode = kX86Nop; + switch (op) { + // X86 binary opcodes + case kOpSub: opcode = kX86Sub32RM; break; + case kOpMov: opcode = kX86Mov32RM; break; + case kOpCmp: opcode = kX86Cmp32RM; break; + case kOpAdd: opcode = kX86Add32RM; break; + case kOpAnd: opcode = kX86And32RM; break; + case kOpOr: opcode = kX86Or32RM; break; + case kOpXor: opcode = kX86Xor32RM; break; + case kOp2Byte: opcode = kX86Movsx8RM; break; + case kOp2Short: opcode = kX86Movsx16RM; break; + case kOp2Char: opcode = kX86Movzx16RM; break; + case kOpMul: + default: + LOG(FATAL) << "Bad case in OpRegMem " << op; + break; + } + return NewLIR3(opcode, r_dest, rBase, offset); +} + +LIR* X86Mir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1, + int r_src2) +{ + if (r_dest != r_src1 && r_dest != r_src2) { + if (op == kOpAdd) { // lea special case, except can't encode rbp as base + if (r_src1 == r_src2) { + OpRegCopy(r_dest, r_src1); + return OpRegImm(kOpLsl, r_dest, 1); + } else if (r_src1 != rBP) { + return NewLIR5(kX86Lea32RA, r_dest, r_src1 /* base */, + r_src2 /* index */, 0 /* scale */, 0 /* disp */); + } else { + return NewLIR5(kX86Lea32RA, r_dest, r_src2 /* base */, + r_src1 /* index */, 0 /* scale */, 0 /* disp */); + } + } else { + OpRegCopy(r_dest, r_src1); + return OpRegReg(op, r_dest, r_src2); + } + } else if (r_dest == r_src1) { + return OpRegReg(op, r_dest, r_src2); + } else { // r_dest == r_src2 + switch (op) { + case kOpSub: // non-commutative + OpReg(kOpNeg, r_dest); + op = kOpAdd; + break; + case kOpSbc: + case kOpLsl: case kOpLsr: case kOpAsr: case kOpRor: { + int t_reg = AllocTemp(); + OpRegCopy(t_reg, r_src1); + OpRegReg(op, t_reg, r_src2); + LIR* res = OpRegCopy(r_dest, t_reg); + FreeTemp(t_reg); + return res; + } + case kOpAdd: // commutative + case kOpOr: + case kOpAdc: + case kOpAnd: + case kOpXor: + break; + default: + LOG(FATAL) << "Bad case in OpRegRegReg " << op; + } + return OpRegReg(op, r_dest, r_src1); + } +} + +LIR* X86Mir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src, + int value) +{ + if (op == kOpMul) { + X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI; + return NewLIR3(opcode, r_dest, r_src, value); + } else if (op == kOpAnd) { + if (value == 0xFF && r_src < 4) { + return NewLIR2(kX86Movzx8RR, r_dest, r_src); + } else if (value == 0xFFFF) { + return NewLIR2(kX86Movzx16RR, r_dest, r_src); + } + } + if (r_dest != r_src) { + if (false && op == kOpLsl && value >= 0 && value <= 3) { // lea shift special case + // TODO: fix bug in LEA encoding when disp == 0 + return NewLIR5(kX86Lea32RA, r_dest, r5sib_no_base /* base */, + r_src /* index */, value /* scale */, 0 /* disp */); + } else if (op == kOpAdd) { // lea add special case + return NewLIR5(kX86Lea32RA, r_dest, r_src /* base */, + r4sib_no_index /* index */, 0 /* scale */, value /* disp */); + } + OpRegCopy(r_dest, r_src); + } + return OpRegImm(op, r_dest, value); +} + +LIR* X86Mir2Lir::OpThreadMem(OpKind op, int thread_offset) +{ + X86OpCode opcode = kX86Bkpt; + switch (op) { + case kOpBlx: opcode = kX86CallT; break; + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + return NewLIR1(opcode, thread_offset); +} + +LIR* X86Mir2Lir::OpMem(OpKind op, int rBase, int disp) +{ + X86OpCode opcode = kX86Bkpt; + switch (op) { + case kOpBlx: opcode = kX86CallM; break; + default: + LOG(FATAL) << "Bad opcode: " << op; + break; + } + return NewLIR2(opcode, rBase, disp); +} + +LIR* X86Mir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) +{ + int32_t val_lo = Low32Bits(value); + int32_t val_hi = High32Bits(value); + LIR *res; + if (X86_FPREG(r_dest_lo)) { + DCHECK(X86_FPREG(r_dest_hi)); // ignore r_dest_hi + if (value == 0) { + return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo); + } else { + if (val_lo == 0) { + res = NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo); + } else { + res = LoadConstantNoClobber(r_dest_lo, val_lo); + } + if (val_hi != 0) { + LoadConstantNoClobber(r_dest_hi, val_hi); + NewLIR2(kX86PsllqRI, r_dest_hi, 32); + NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi); + } + } + } else { + res = LoadConstantNoClobber(r_dest_lo, val_lo); + LoadConstantNoClobber(r_dest_hi, val_hi); + } + return res; +} + +LIR* X86Mir2Lir::LoadBaseIndexedDisp(int rBase, int r_index, int scale, + int displacement, int r_dest, int r_dest_hi, OpSize size, + int s_reg) { + LIR *load = NULL; + LIR *load2 = NULL; + bool is_array = r_index != INVALID_REG; + bool pair = false; + bool is64bit = false; + X86OpCode opcode = kX86Nop; + switch (size) { + case kLong: + case kDouble: + is64bit = true; + if (X86_FPREG(r_dest)) { + opcode = is_array ? kX86MovsdRA : kX86MovsdRM; + if (X86_SINGLEREG(r_dest)) { + DCHECK(X86_FPREG(r_dest_hi)); + DCHECK_EQ(r_dest, (r_dest_hi - 1)); + r_dest = S2d(r_dest, r_dest_hi); + } + r_dest_hi = r_dest + 1; + } else { + pair = true; + opcode = is_array ? kX86Mov32RA : kX86Mov32RM; + } + // TODO: double store is to unaligned address + DCHECK_EQ((displacement & 0x3), 0); + break; + case kWord: + case kSingle: + opcode = is_array ? kX86Mov32RA : kX86Mov32RM; + if (X86_FPREG(r_dest)) { + opcode = is_array ? kX86MovssRA : kX86MovssRM; + DCHECK(X86_SINGLEREG(r_dest)); + } + DCHECK_EQ((displacement & 0x3), 0); + break; + case kUnsignedHalf: + opcode = is_array ? kX86Movzx16RA : kX86Movzx16RM; + DCHECK_EQ((displacement & 0x1), 0); + break; + case kSignedHalf: + opcode = is_array ? kX86Movsx16RA : kX86Movsx16RM; + DCHECK_EQ((displacement & 0x1), 0); + break; + case kUnsignedByte: + opcode = is_array ? kX86Movzx8RA : kX86Movzx8RM; + break; + case kSignedByte: + opcode = is_array ? kX86Movsx8RA : kX86Movsx8RM; + break; + default: + LOG(FATAL) << "Bad case in LoadBaseIndexedDispBody"; + } + + if (!is_array) { + if (!pair) { + load = NewLIR3(opcode, r_dest, rBase, displacement + LOWORD_OFFSET); + } else { + if (rBase == r_dest) { + load2 = NewLIR3(opcode, r_dest_hi, rBase, + displacement + HIWORD_OFFSET); + load = NewLIR3(opcode, r_dest, rBase, displacement + LOWORD_OFFSET); + } else { + load = NewLIR3(opcode, r_dest, rBase, displacement + LOWORD_OFFSET); + load2 = NewLIR3(opcode, r_dest_hi, rBase, + displacement + HIWORD_OFFSET); + } + } + if (rBase == rX86_SP) { + AnnotateDalvikRegAccess(load, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, + true /* is_load */, is64bit); + if (pair) { + AnnotateDalvikRegAccess(load2, (displacement + HIWORD_OFFSET) >> 2, + true /* is_load */, is64bit); + } + } + } else { + if (!pair) { + load = NewLIR5(opcode, r_dest, rBase, r_index, scale, + displacement + LOWORD_OFFSET); + } else { + if (rBase == r_dest) { + load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale, + displacement + HIWORD_OFFSET); + load = NewLIR5(opcode, r_dest, rBase, r_index, scale, + displacement + LOWORD_OFFSET); + } else { + load = NewLIR5(opcode, r_dest, rBase, r_index, scale, + displacement + LOWORD_OFFSET); + load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale, + displacement + HIWORD_OFFSET); + } + } + } + + return load; +} + +/* Load value from base + scaled index. */ +LIR* X86Mir2Lir::LoadBaseIndexed(int rBase, + int r_index, int r_dest, int scale, OpSize size) { + return LoadBaseIndexedDisp(rBase, r_index, scale, 0, + r_dest, INVALID_REG, size, INVALID_SREG); +} + +LIR* X86Mir2Lir::LoadBaseDisp(int rBase, int displacement, + int r_dest, OpSize size, int s_reg) { + return LoadBaseIndexedDisp(rBase, INVALID_REG, 0, displacement, + r_dest, INVALID_REG, size, s_reg); +} + +LIR* X86Mir2Lir::LoadBaseDispWide(int rBase, int displacement, + int r_dest_lo, int r_dest_hi, int s_reg) { + return LoadBaseIndexedDisp(rBase, INVALID_REG, 0, displacement, + r_dest_lo, r_dest_hi, kLong, s_reg); +} + +LIR* X86Mir2Lir::StoreBaseIndexedDisp(int rBase, int r_index, int scale, + int displacement, int r_src, int r_src_hi, OpSize size, + int s_reg) { + LIR *store = NULL; + LIR *store2 = NULL; + bool is_array = r_index != INVALID_REG; + bool pair = false; + bool is64bit = false; + X86OpCode opcode = kX86Nop; + switch (size) { + case kLong: + case kDouble: + is64bit = true; + if (X86_FPREG(r_src)) { + opcode = is_array ? kX86MovsdAR : kX86MovsdMR; + if (X86_SINGLEREG(r_src)) { + DCHECK(X86_FPREG(r_src_hi)); + DCHECK_EQ(r_src, (r_src_hi - 1)); + r_src = S2d(r_src, r_src_hi); + } + r_src_hi = r_src + 1; + } else { + pair = true; + opcode = is_array ? kX86Mov32AR : kX86Mov32MR; + } + // TODO: double store is to unaligned address + DCHECK_EQ((displacement & 0x3), 0); + break; + case kWord: + case kSingle: + opcode = is_array ? kX86Mov32AR : kX86Mov32MR; + if (X86_FPREG(r_src)) { + opcode = is_array ? kX86MovssAR : kX86MovssMR; + DCHECK(X86_SINGLEREG(r_src)); + } + DCHECK_EQ((displacement & 0x3), 0); + break; + case kUnsignedHalf: + case kSignedHalf: + opcode = is_array ? kX86Mov16AR : kX86Mov16MR; + DCHECK_EQ((displacement & 0x1), 0); + break; + case kUnsignedByte: + case kSignedByte: + opcode = is_array ? kX86Mov8AR : kX86Mov8MR; + break; + default: + LOG(FATAL) << "Bad case in LoadBaseIndexedDispBody"; + } + + if (!is_array) { + if (!pair) { + store = NewLIR3(opcode, rBase, displacement + LOWORD_OFFSET, r_src); + } else { + store = NewLIR3(opcode, rBase, displacement + LOWORD_OFFSET, r_src); + store2 = NewLIR3(opcode, rBase, displacement + HIWORD_OFFSET, r_src_hi); + } + if (rBase == rX86_SP) { + AnnotateDalvikRegAccess(store, (displacement + (pair ? LOWORD_OFFSET : 0)) >> 2, + false /* is_load */, is64bit); + if (pair) { + AnnotateDalvikRegAccess(store2, (displacement + HIWORD_OFFSET) >> 2, + false /* is_load */, is64bit); + } + } + } else { + if (!pair) { + store = NewLIR5(opcode, rBase, r_index, scale, + displacement + LOWORD_OFFSET, r_src); + } else { + store = NewLIR5(opcode, rBase, r_index, scale, + displacement + LOWORD_OFFSET, r_src); + store2 = NewLIR5(opcode, rBase, r_index, scale, + displacement + HIWORD_OFFSET, r_src_hi); + } + } + + return store; +} + +/* store value base base + scaled index. */ +LIR* X86Mir2Lir::StoreBaseIndexed(int rBase, int r_index, int r_src, + int scale, OpSize size) +{ + return StoreBaseIndexedDisp(rBase, r_index, scale, 0, + r_src, INVALID_REG, size, INVALID_SREG); +} + +LIR* X86Mir2Lir::StoreBaseDisp(int rBase, int displacement, + int r_src, OpSize size) +{ + return StoreBaseIndexedDisp(rBase, INVALID_REG, 0, + displacement, r_src, INVALID_REG, size, + INVALID_SREG); +} + +LIR* X86Mir2Lir::StoreBaseDispWide(int rBase, int displacement, + int r_src_lo, int r_src_hi) +{ + return StoreBaseIndexedDisp(rBase, INVALID_REG, 0, displacement, + r_src_lo, r_src_hi, kLong, INVALID_SREG); +} + +} // namespace art diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h new file mode 100644 index 0000000000..600bd03026 --- /dev/null +++ b/compiler/dex/quick/x86/x86_lir.h @@ -0,0 +1,442 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_COMPILER_DEX_QUICK_X86_X86LIR_H_ +#define ART_SRC_COMPILER_DEX_QUICK_X86_X86LIR_H_ + +#include "dex/compiler_internals.h" + +namespace art { + +/* + * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64), although + * we currently only target x86. The ABI has different conventions and we hope to have a single + * convention to simplify code generation. Changing something that is callee save and making it + * caller save places a burden on up-calls to save/restore the callee save register, however, there + * are few registers that are callee save in the ABI. Changing something that is caller save and + * making it callee save places a burden on down-calls to save/restore the callee save register. + * For these reasons we aim to match native conventions for caller and callee save. The first 4 + * registers can be used for byte operations, for this reason they are preferred for temporary + * scratch registers. + * + * General Purpose Register: + * Native: x86 | x86-64 / x32 | ART + * r0/eax: caller save | caller save | caller, Method*, scratch, return value + * r1/ecx: caller save | caller save, arg4 | caller, arg1, scratch + * r2/edx: caller save | caller save, arg3 | caller, arg2, scratch, high half of long return + * r3/ebx: callEE save | callEE save | callER, arg3, scratch + * r4/esp: stack pointer + * r5/ebp: callee save | callee save | callee, available for dalvik register promotion + * r6/esi: callEE save | callER save, arg2 | callee, available for dalvik register promotion + * r7/edi: callEE save | callER save, arg1 | callee, available for dalvik register promotion + * --- x86-64/x32 registers + * Native: x86-64 / x32 | ART + * r8: caller save, arg5 | caller, scratch + * r9: caller save, arg6 | caller, scratch + * r10: caller save | caller, scratch + * r11: caller save | caller, scratch + * r12: callee save | callee, available for dalvik register promotion + * r13: callee save | callee, available for dalvik register promotion + * r14: callee save | callee, available for dalvik register promotion + * r15: callee save | callee, available for dalvik register promotion + * + * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on + * x86-64/x32 gs: holds it. + * + * For floating point we don't support CPUs without SSE2 support (ie newer than PIII): + * Native: x86 | x86-64 / x32 | ART + * XMM0: caller save |caller save, arg1 | caller, float/double return value (except for native x86 code) + * XMM1: caller save |caller save, arg2 | caller, scratch + * XMM2: caller save |caller save, arg3 | caller, scratch + * XMM3: caller save |caller save, arg4 | caller, scratch + * XMM4: caller save |caller save, arg5 | caller, scratch + * XMM5: caller save |caller save, arg6 | caller, scratch + * XMM6: caller save |caller save, arg7 | caller, scratch + * XMM7: caller save |caller save, arg8 | caller, scratch + * --- x86-64/x32 registers + * XMM8 .. 15: caller save + * + * X87 is a necessary evil outside of ART code: + * ST0: x86 float/double native return value, caller save + * ST1 .. ST7: caller save + * + * Stack frame diagram (stack grows down, higher addresses at top): + * + * +------------------------+ + * | IN[ins-1] | {Note: resides in caller's frame} + * | . | + * | IN[0] | + * | caller's Method* | + * +========================+ {Note: start of callee's frame} + * | return address | {pushed by call} + * | spill region | {variable sized} + * +------------------------+ + * | ...filler word... | {Note: used as 2nd word of V[locals-1] if long] + * +------------------------+ + * | V[locals-1] | + * | V[locals-2] | + * | . | + * | . | + * | V[1] | + * | V[0] | + * +------------------------+ + * | 0 to 3 words padding | + * +------------------------+ + * | OUT[outs-1] | + * | OUT[outs-2] | + * | . | + * | OUT[0] | + * | cur_method* | <<== sp w/ 16-byte alignment + * +========================+ + */ + +// Offset to distingish FP regs. +#define X86_FP_REG_OFFSET 32 +// Offset to distinguish DP FP regs. +#define X86_FP_DOUBLE (X86_FP_REG_OFFSET + 16) +// Offset to distingish the extra regs. +#define X86_EXTRA_REG_OFFSET (X86_FP_DOUBLE + 16) +// Reg types. +#define X86_REGTYPE(x) (x & (X86_FP_REG_OFFSET | X86_FP_DOUBLE)) +#define X86_FPREG(x) ((x & X86_FP_REG_OFFSET) == X86_FP_REG_OFFSET) +#define X86_EXTRAREG(x) ((x & X86_EXTRA_REG_OFFSET) == X86_EXTRA_REG_OFFSET) +#define X86_DOUBLEREG(x) ((x & X86_FP_DOUBLE) == X86_FP_DOUBLE) +#define X86_SINGLEREG(x) (X86_FPREG(x) && !X86_DOUBLEREG(x)) + +/* + * Note: the low register of a floating point pair is sufficient to + * create the name of a double, but require both names to be passed to + * allow for asserts to verify that the pair is consecutive if significant + * rework is done in this area. Also, it is a good reminder in the calling + * code that reg locations always describe doubles as a pair of singles. + */ +#define X86_S2D(x,y) ((x) | X86_FP_DOUBLE) +/* Mask to strip off fp flags */ +#define X86_FP_REG_MASK 0xF + +// RegisterLocation templates return values (rAX, rAX/rDX or XMM0). +// location, wide, defined, const, fp, core, ref, high_word, home, low_reg, high_reg, s_reg_low +#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG} +#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rAX, rDX, INVALID_SREG, INVALID_SREG} +#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG} +#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, fr0, fr1, INVALID_SREG, INVALID_SREG} + +enum X86ResourceEncodingPos { + kX86GPReg0 = 0, + kX86RegSP = 4, + kX86FPReg0 = 16, // xmm0 .. xmm7/xmm15. + kX86FPRegEnd = 32, + kX86RegEnd = kX86FPRegEnd, +}; + +#define ENCODE_X86_REG_LIST(N) (static_cast<uint64_t>(N)) +#define ENCODE_X86_REG_SP (1ULL << kX86RegSP) + +enum X86NativeRegisterPool { + r0 = 0, + rAX = r0, + r1 = 1, + rCX = r1, + r2 = 2, + rDX = r2, + r3 = 3, + rBX = r3, + r4sp = 4, + rX86_SP = r4sp, + r4sib_no_index = r4sp, + r5 = 5, + rBP = r5, + r5sib_no_base = r5, + r6 = 6, + rSI = r6, + r7 = 7, + rDI = r7, +#ifndef TARGET_REX_SUPPORT + rRET = 8, // fake return address register for core spill mask. +#else + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, + rRET = 16, // fake return address register for core spill mask. +#endif + fr0 = 0 + X86_FP_REG_OFFSET, + fr1 = 1 + X86_FP_REG_OFFSET, + fr2 = 2 + X86_FP_REG_OFFSET, + fr3 = 3 + X86_FP_REG_OFFSET, + fr4 = 4 + X86_FP_REG_OFFSET, + fr5 = 5 + X86_FP_REG_OFFSET, + fr6 = 6 + X86_FP_REG_OFFSET, + fr7 = 7 + X86_FP_REG_OFFSET, + fr8 = 8 + X86_FP_REG_OFFSET, + fr9 = 9 + X86_FP_REG_OFFSET, + fr10 = 10 + X86_FP_REG_OFFSET, + fr11 = 11 + X86_FP_REG_OFFSET, + fr12 = 12 + X86_FP_REG_OFFSET, + fr13 = 13 + X86_FP_REG_OFFSET, + fr14 = 14 + X86_FP_REG_OFFSET, + fr15 = 15 + X86_FP_REG_OFFSET, +}; + +#define rX86_ARG0 rAX +#define rX86_ARG1 rCX +#define rX86_ARG2 rDX +#define rX86_ARG3 rBX +#define rX86_FARG0 rAX +#define rX86_FARG1 rCX +#define rX86_FARG2 rDX +#define rX86_FARG3 rBX +#define rX86_RET0 rAX +#define rX86_RET1 rDX +#define rX86_INVOKE_TGT rAX +#define rX86_LR INVALID_REG +#define rX86_SUSPEND INVALID_REG +#define rX86_SELF INVALID_REG +#define rX86_COUNT rCX +#define rX86_PC INVALID_REG + +/* + * The following enum defines the list of supported X86 instructions by the + * assembler. Their corresponding EncodingMap positions will be defined in + * Assemble.cc. + */ +enum X86OpCode { + kX86First = 0, + kX8632BitData = kX86First, // data [31..0]. + kX86Bkpt, + kX86Nop, + // Define groups of binary operations + // MR - Memory Register - opcode [base + disp], reg + // - lir operands - 0: base, 1: disp, 2: reg + // AR - Array Register - opcode [base + index * scale + disp], reg + // - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg + // TR - Thread Register - opcode fs:[disp], reg - where fs: is equal to Thread::Current() + // - lir operands - 0: disp, 1: reg + // RR - Register Register - opcode reg1, reg2 + // - lir operands - 0: reg1, 1: reg2 + // RM - Register Memory - opcode reg, [base + disp] + // - lir operands - 0: reg, 1: base, 2: disp + // RA - Register Array - opcode reg, [base + index * scale + disp] + // - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp + // RT - Register Thread - opcode reg, fs:[disp] - where fs: is equal to Thread::Current() + // - lir operands - 0: reg, 1: disp + // RI - Register Immediate - opcode reg, #immediate + // - lir operands - 0: reg, 1: immediate + // MI - Memory Immediate - opcode [base + disp], #immediate + // - lir operands - 0: base, 1: disp, 2: immediate + // AI - Array Immediate - opcode [base + index * scale + disp], #immediate + // - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate + // TI - Thread Register - opcode fs:[disp], imm - where fs: is equal to Thread::Current() + // - lir operands - 0: disp, 1: imm +#define BinaryOpCode(opcode) \ + opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \ + opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \ + opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \ + opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \ + opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \ + opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \ + opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \ + opcode ## 32MR, opcode ## 32AR, opcode ## 32TR, \ + opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \ + opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \ + opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8 + BinaryOpCode(kX86Add), + BinaryOpCode(kX86Or), + BinaryOpCode(kX86Adc), + BinaryOpCode(kX86Sbb), + BinaryOpCode(kX86And), + BinaryOpCode(kX86Sub), + BinaryOpCode(kX86Xor), + BinaryOpCode(kX86Cmp), +#undef BinaryOpCode + kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI, + kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI, + kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8, + kX86Mov8MR, kX86Mov8AR, kX86Mov8TR, + kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT, + kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI, + kX86Mov16MR, kX86Mov16AR, kX86Mov16TR, + kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT, + kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI, + kX86Mov32MR, kX86Mov32AR, kX86Mov32TR, + kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT, + kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, + kX86Lea32RA, + // RC - Register CL - opcode reg, CL + // - lir operands - 0: reg, 1: CL + // MC - Memory CL - opcode [base + disp], CL + // - lir operands - 0: base, 1: disp, 2: CL + // AC - Array CL - opcode [base + index * scale + disp], CL + // - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL +#define BinaryShiftOpCode(opcode) \ + opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \ + opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \ + opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \ + opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \ + opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \ + opcode ## 32RC, opcode ## 32MC, opcode ## 32AC + BinaryShiftOpCode(kX86Rol), + BinaryShiftOpCode(kX86Ror), + BinaryShiftOpCode(kX86Rcl), + BinaryShiftOpCode(kX86Rcr), + BinaryShiftOpCode(kX86Sal), + BinaryShiftOpCode(kX86Shr), + BinaryShiftOpCode(kX86Sar), +#undef BinaryShiftOpcode + kX86Cmc, +#define UnaryOpcode(opcode, reg, mem, array) \ + opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \ + opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \ + opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array + UnaryOpcode(kX86Test, RI, MI, AI), + kX86Test32RR, + UnaryOpcode(kX86Not, R, M, A), + UnaryOpcode(kX86Neg, R, M, A), + UnaryOpcode(kX86Mul, DaR, DaM, DaA), + UnaryOpcode(kX86Imul, DaR, DaM, DaA), + UnaryOpcode(kX86Divmod, DaR, DaM, DaA), + UnaryOpcode(kX86Idivmod, DaR, DaM, DaA), +#undef UnaryOpcode +#define Binary0fOpCode(opcode) \ + opcode ## RR, opcode ## RM, opcode ## RA + Binary0fOpCode(kX86Movsd), + kX86MovsdMR, + kX86MovsdAR, + Binary0fOpCode(kX86Movss), + kX86MovssMR, + kX86MovssAR, + Binary0fOpCode(kX86Cvtsi2sd), // int to double + Binary0fOpCode(kX86Cvtsi2ss), // int to float + Binary0fOpCode(kX86Cvttsd2si),// truncating double to int + Binary0fOpCode(kX86Cvttss2si),// truncating float to int + Binary0fOpCode(kX86Cvtsd2si), // rounding double to int + Binary0fOpCode(kX86Cvtss2si), // rounding float to int + Binary0fOpCode(kX86Ucomisd), // unordered double compare + Binary0fOpCode(kX86Ucomiss), // unordered float compare + Binary0fOpCode(kX86Comisd), // double compare + Binary0fOpCode(kX86Comiss), // float compare + Binary0fOpCode(kX86Orps), // or of floating point registers + Binary0fOpCode(kX86Xorps), // xor of floating point registers + Binary0fOpCode(kX86Addsd), // double add + Binary0fOpCode(kX86Addss), // float add + Binary0fOpCode(kX86Mulsd), // double multiply + Binary0fOpCode(kX86Mulss), // float multiply + Binary0fOpCode(kX86Cvtsd2ss), // double to float + Binary0fOpCode(kX86Cvtss2sd), // float to double + Binary0fOpCode(kX86Subsd), // double subtract + Binary0fOpCode(kX86Subss), // float subtract + Binary0fOpCode(kX86Divsd), // double divide + Binary0fOpCode(kX86Divss), // float divide + kX86PsrlqRI, // right shift of floating point registers + kX86PsllqRI, // left shift of floating point registers + Binary0fOpCode(kX86Movdxr), // move into xmm from gpr + kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,// move into reg from xmm + kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand + kX86Mfence, // memory barrier + Binary0fOpCode(kX86Imul16), // 16bit multiply + Binary0fOpCode(kX86Imul32), // 32bit multiply + kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,// compare and exchange + kX86LockCmpxchgRR, kX86LockCmpxchgMR, kX86LockCmpxchgAR,// locked compare and exchange + Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value + Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value + Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value + Binary0fOpCode(kX86Movsx16), // sign-extend 16-bit value +#undef Binary0fOpCode + kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned + kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned + kX86JmpR, // jmp reg; lir operands - 0: reg + kX86CallR, // call reg; lir operands - 0: reg + kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp + kX86CallA, // call [base + index * scale + disp] + // lir operands - 0: base, 1: index, 2: scale, 3: disp + kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp + kX86Ret, // ret; no lir operands + kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg + // lir operands - 0: reg + kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement] + // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table + kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table + kX86Last +}; + +/* Instruction assembly field_loc kind */ +enum X86EncodingKind { + kData, // Special case for raw data. + kNop, // Special case for variable length nop. + kNullary, // Opcode that takes no arguments. + kReg, kMem, kArray, // R, M and A instruction kinds. + kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds. + kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds. + kRegRegStore, // RR following the store modrm reg-reg encoding rather than the load. + kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds. + kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds. + kMovRegImm, // Shorter form move RI. + kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate. + kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL. + kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. + kRegCond, kMemCond, kArrayCond, // R, M, A instruction kinds following by a condition. + kJmp, kJcc, kCall, // Branch instruction kinds. + kPcRel, // Operation with displacement that is PC relative + kMacro, // An instruction composing multiple others + kUnimplemented // Encoding used when an instruction isn't yet implemented. +}; + +/* Struct used to define the EncodingMap positions for each X86 opcode */ +struct X86EncodingMap { + X86OpCode opcode; // e.g. kOpAddRI + X86EncodingKind kind; // Used to discriminate in the union below + uint64_t flags; + struct { + uint8_t prefix1; // non-zero => a prefix byte + uint8_t prefix2; // non-zero => a second prefix byte + uint8_t opcode; // 1 byte opcode + uint8_t extra_opcode1; // possible extra opcode byte + uint8_t extra_opcode2; // possible second extra opcode byte + // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the + // encoding kind + uint8_t modrm_opcode; + uint8_t ax_opcode; // non-zero => shorter encoding for AX as a destination + uint8_t immediate_bytes; // number of bytes of immediate + } skeleton; + const char *name; + const char* fmt; +}; + + +// FIXME: mem barrier type - what do we do for x86? +#define kSY 0 +#define kST 0 + +// Offsets of high and low halves of a 64bit value. +#define LOWORD_OFFSET 0 +#define HIWORD_OFFSET 4 + +// Segment override instruction prefix used for quick TLS access to Thread::Current(). +#define THREAD_PREFIX 0x64 + +#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127)) +#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767)) + +extern X86EncodingMap EncodingMap[kX86Last]; +extern X86ConditionCode X86ConditionEncoding(ConditionCode cond); + +} // namespace art + +#endif // ART_SRC_COMPILER_DEX_QUICK_X86_X86LIR_H_ |