diff options
Diffstat (limited to 'vm/compiler/codegen/arm')
-rw-r--r-- | vm/compiler/codegen/arm/ArchUtility.c | 28 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/ArmLIR.h | 57 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Assemble.c | 110 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/LocalOptimizations.c | 69 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb2Util.c | 230 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/ThumbUtil.c | 14 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/armv7-a/ArchVariant.c | 37 |
7 files changed, 434 insertions, 111 deletions
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c index abcb2eb2d..60c5cdb86 100644 --- a/vm/compiler/codegen/arm/ArchUtility.c +++ b/vm/compiler/codegen/arm/ArchUtility.c @@ -37,6 +37,26 @@ static char * decodeRegList(int vector, char *buf) return buf; } +static int expandImmediate(int value) +{ + int mode = (value & 0xf00) >> 8; + u4 bits = value & 0xff; + switch(mode) { + case 0: + return bits; + case 1: + return (bits << 16) | bits; + case 2: + return (bits << 24) | (bits << 8); + case 3: + return (bits << 24) | (bits << 16) | (bits << 8) | bits; + default: + break; + } + bits = (bits | 0x80) << 24; + return bits >> (((value & 0xf80) >> 7) - 8); +} + /* * Interpret a format string and build a string no longer than size * See format key in Assemble.c. @@ -62,6 +82,10 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf, assert((unsigned)(nc-'0') < 3); operand = lir->operands[nc-'0']; switch(*fmt++) { + case 'm': + operand = expandImmediate(operand); + sprintf(tbuf,"%d [0x%x]", operand, operand); + break; case 's': sprintf(tbuf,"s%d",operand & FP_REG_MASK); break; @@ -71,6 +95,7 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf, case 'h': sprintf(tbuf,"%04x", operand); break; + case 'M': case 'd': sprintf(tbuf,"%d", operand); break; @@ -106,6 +131,9 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf, case ARM_COND_CS: strcpy(tbuf, "bcs"); break; + case ARM_COND_MI: + strcpy(tbuf, "bmi"); + break; default: strcpy(tbuf, ""); break; diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index b43dab06c..59c752949 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -54,13 +54,19 @@ /* Offset to distingish FP regs */ #define FP_REG_OFFSET 32 -/* Is reg fp? */ -#define IS_FP_REG(x) (x & FP_REG_OFFSET) +/* Offset to distinguish DP FP regs */ +#define FP_DOUBLE 64 +/* Reg types */ +#define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET) +#define LOWREG(x) ((x & 0x7) == x) +#define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE) +#define SINGLEREG(x) (FPREG(x) && !DOUBLEREG(x)) /* Mask to strip off fp flags */ #define FP_REG_MASK (FP_REG_OFFSET-1) /* Mask to convert high reg to low for Thumb */ #define THUMB_REG_MASK 0x7 + typedef enum NativeRegisterPool { r0 = 0, r1 = 1, @@ -110,6 +116,22 @@ typedef enum NativeRegisterPool { fr29 = 29 + FP_REG_OFFSET, fr30 = 30 + FP_REG_OFFSET, fr31 = 31 + FP_REG_OFFSET, + dr0 = fr0 + FP_DOUBLE, + dr1 = fr2 + FP_DOUBLE, + dr2 = fr4 + FP_DOUBLE, + dr3 = fr6 + FP_DOUBLE, + dr4 = fr8 + FP_DOUBLE, + dr5 = fr10 + FP_DOUBLE, + dr6 = fr12 + FP_DOUBLE, + dr7 = fr14 + FP_DOUBLE, + dr8 = fr16 + FP_DOUBLE, + dr9 = fr18 + FP_DOUBLE, + dr10 = fr20 + FP_DOUBLE, + dr11 = fr22 + FP_DOUBLE, + dr12 = fr24 + FP_DOUBLE, + dr13 = fr26 + FP_DOUBLE, + dr14 = fr28 + FP_DOUBLE, + dr15 = fr30 + FP_DOUBLE, } NativeRegisterPool; /* Thumb condition encodings */ @@ -217,7 +239,6 @@ typedef enum ArmOpCode { THUMB_SUB_SPI7, /* sub(4) [101100001] imm_7[6..0] */ THUMB_SWI, /* swi [11011111] imm_8[7..0] */ THUMB_TST, /* tst [0100001000] rm[5..3] rn[2..0] */ -// FIXME: Enhance assembly encoding. Only low fp regs supported here THUMB2_VLDRS, /* vldr low sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0] */ THUMB2_VLDRD, /* vldr low dx [111011011001] rn[19..16] rd[15-12] @@ -258,6 +279,30 @@ typedef enum ArmOpCode { [10101100] vm[3..0] */ THUMB2_VSQRTD, /* vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0] */ + THUMB2_MOV_IMM_SHIFT, /* mov(T2) rd, #<const> [11110] i [00001001111] + imm3 rd[11..8] imm8 */ + THUMB2_MOV_IMM16, /* mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] + imm3 rd[11..8] imm8 */ + THUMB2_STR_RRI12, /* str(Imm,T3) rd,[rn,#imm12] [111110001100] + rn[19..16] rt[15..12] imm12[11..0] */ + THUMB2_LDR_RRI12, /* str(Imm,T3) rd,[rn,#imm12] [111110001100] + rn[19..16] rt[15..12] imm12[11..0] */ + THUMB2_STR_RRI8_PREDEC, /* str(Imm,T4) rd,[rn,#-imm8] [111110000100] + rn[19..16] rt[15..12] [1100] imm[7..0]*/ + THUMB2_LDR_RRI8_PREDEC, /* ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] + rn[19..16] rt[15..12] [1100] imm[7..0]*/ + THUMB2_CBNZ, /* cbnz rd,<label> [101110] i [1] imm5[7..3] + rn[2..0] */ + THUMB2_CBZ, /* cbn rd,<label> [101100] i [1] imm5[7..3] + rn[2..0] */ + THUMB2_ADD_RRI12, /* add rd, rn, #imm12 [11110] i [100000] rn[19..16] + [0] imm3[14..12] rd[11..8] imm8[7..0] */ + THUMB2_MOV_RR, /* mov rd, rm [11101010010011110000] rd[11..8] + [0000] rm[3..0] */ + THUMB2_VMOVS, /* vmov.f32 vd, vm [111011101] D [110000] + vd[15..12] 101001] M [0] vm[3..0] */ + THUMB2_VMOVD, /* vmov.f64 vd, vm [111011101] D [110000] + vd[15..12] 101101] M [0] vm[3..0] */ ARM_LAST, } ArmOpCode; @@ -278,8 +323,10 @@ typedef enum ArmEncodingKind { BITBLT, /* Bit string using end/start */ DFP, /* Double FP reg */ SFP, /* Single FP reg */ - IMMSHIFT8, /* Shifted 8-bit immed field using [26,14..12,7..0] */ - IMM12, /* Zero-extended 12-bit immediate using [26,14..12,7..0] */ + MODIMM, /* Shifted 8-bit immediate using [26,14..12,7..0] */ + IMM16, /* Zero-extended immediate using [26,19..16,14..12,7..0] */ + IMM6, /* Encoded branch target using [9,7..3]0 */ + IMM12, /* Zero-extended immediate using [26,14..12,7..0] */ } ArmEncodingKind; /* Struct used to define the snippet positions for each Thumb opcode */ diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index fb85253ab..ea133e7b1 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -65,6 +65,8 @@ * R -> register list * s -> single precision floating point register * S -> double precision floating point register + * m -> Thumb2 modified immediate + * M -> Thumb2 16-bit zero-extended immediate * * [!] escape. To insert "!", use "!!" */ @@ -365,19 +367,19 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "tst", "r!0d, r!1d", 1), ENCODING_MAP(THUMB2_VLDRS, 0xed900a00, SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vldr", "!0s, [r!1d, #!2E]", 2), ENCODING_MAP(THUMB2_VLDRD, 0xed900b00, DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vldr", "!0S, [r!1d, #!2E]", 2), ENCODING_MAP(THUMB2_VMULS, 0xee200a00, SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vmuls", "!0s, !1s, !2s", 2), ENCODING_MAP(THUMB2_VMULD, 0xee200b00, DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vmuld", "!0S, !1S, !2S", 2), ENCODING_MAP(THUMB2_VSTRS, 0xed800a00, SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, @@ -389,60 +391,108 @@ ArmEncodingMap EncodingMap[ARM_LAST] = { "vstr", "!0S, [r!1d, #!2E]", 2), ENCODING_MAP(THUMB2_VSUBS, 0xee300a40, SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vsub", "!0s, !1s, !2s", 2), ENCODING_MAP(THUMB2_VSUBD, 0xee300b40, DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vsub", "!0S, !1S, !2S", 2), ENCODING_MAP(THUMB2_VADDS, 0xee300a00, SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vadd", "!0s, !1s, !2s", 2), ENCODING_MAP(THUMB2_VADDD, 0xee300b00, DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vadd", "!0S, !1S, !2S", 2), ENCODING_MAP(THUMB2_VDIVS, 0xee800a00, SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vdivs", "!0s, !1s, !2s", 2), ENCODING_MAP(THUMB2_VDIVD, 0xee800b00, DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, - IS_TERTIARY_OP, + IS_TERTIARY_OP | CLOBBER_DEST, "vdivs", "!0S, !1S, !2S", 2), ENCODING_MAP(THUMB2_VCVTIF, 0xeeb80ac0, SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vcvt.f32", "!0s, !1s", 2), ENCODING_MAP(THUMB2_VCVTID, 0xeeb80bc0, DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vcvt.f64", "!0S, !1s", 2), ENCODING_MAP(THUMB2_VCVTFI, 0xeebd0ac0, SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vcvt.s32.f32 ", "!0s, !1s", 2), ENCODING_MAP(THUMB2_VCVTDI, 0xeebd0bc0, SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vcvt.s32.f64 ", "!0s, !1S", 2), ENCODING_MAP(THUMB2_VCVTFD, 0xeeb70ac0, DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vcvt.f64.f32 ", "!0S, !1s", 2), ENCODING_MAP(THUMB2_VCVTDF, 0xeeb70bc0, SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vcvt.f32.f64 ", "!0s, !1S", 2), ENCODING_MAP(THUMB2_VSQRTS, 0xeeb10ac0, SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vsqrt.f32 ", "!0s, !1s", 2), ENCODING_MAP(THUMB2_VSQRTD, 0xeeb10bc0, DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, - IS_BINARY_OP, + IS_BINARY_OP | CLOBBER_DEST, "vsqrt.f64 ", "!0S, !1S", 2), + ENCODING_MAP(THUMB2_MOV_IMM_SHIFT, 0xf04f0000, + BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1, + IS_BINARY_OP | CLOBBER_DEST, + "mov", "r!0d, #!1m", 2), + ENCODING_MAP(THUMB2_MOV_IMM16, 0xf2400000, + BITBLT, 11, 8, IMM16, -1, -1, UNUSED, -1, -1, + IS_BINARY_OP | CLOBBER_DEST, + "mov", "r!0d, #!1M", 2), + ENCODING_MAP(THUMB2_STR_RRI12, 0xf8c00000, + BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, + IS_TERTIARY_OP, + "str", "r!0d,[r!1d, #!2d", 2), + ENCODING_MAP(THUMB2_LDR_RRI12, 0xf8d00000, + BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0, + IS_TERTIARY_OP | CLOBBER_DEST, + "ldr", "r!0d,[r!1d, #!2d", 2), + ENCODING_MAP(THUMB2_STR_RRI8_PREDEC, 0xf8400c00, + BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0, + IS_TERTIARY_OP, + "str", "r!0d,[r!1d, #-!2d]", 2), + ENCODING_MAP(THUMB2_LDR_RRI8_PREDEC, 0xf8500c00, + BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0, + IS_TERTIARY_OP | CLOBBER_DEST, + "ldr", "r!0d,[r!1d, #-!2d]", 2), + ENCODING_MAP(THUMB2_CBNZ, 0xb900, + BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, + IS_BINARY_OP, + "cbnz", "r!0d,!1t", 1), + ENCODING_MAP(THUMB2_CBZ, 0xb100, + BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, + IS_BINARY_OP, + "cbz", "r!0d,!1t", 1), + ENCODING_MAP(THUMB2_ADD_RRI12, 0xf1000000, + BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1, + IS_TERTIARY_OP | CLOBBER_DEST, + "add", "r!0d,r!1d,#!2d", 2), + ENCODING_MAP(THUMB2_MOV_RR, 0xea4f0000, + BITBLT, 11, 8, BITBLT, 3, 0, UNUSED, -1, -1, + IS_BINARY_OP | CLOBBER_DEST, + "mov", "r!0d, r!1d", 2), + ENCODING_MAP(THUMB2_VMOVS, 0xeeb00a40, + SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP | CLOBBER_DEST, + "vmov.f32 ", "!0s, !1s", 2), + ENCODING_MAP(THUMB2_VMOVD, 0xeeb00b40, + DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP | CLOBBER_DEST, + "vmov.f64 ", "!0s, !1s", 2), }; #define PADDING_MOV_R0_R0 0x1C00 @@ -508,6 +558,15 @@ static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr) return true; } lir->operands[1] = delta >> 2; + } else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) { + ArmLIR *targetLIR = (ArmLIR *) lir->generic.target; + intptr_t pc = lir->generic.offset + 4; + intptr_t target = targetLIR->generic.offset; + int delta = target - pc; + if (delta > 126 || delta < 0) { + return true; + } + lir->operands[1] = delta >> 1; } else if (lir->opCode == THUMB_B_COND) { ArmLIR *targetLIR = (ArmLIR *) lir->generic.target; intptr_t pc = lir->generic.offset + 4; @@ -552,6 +611,11 @@ static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr) switch(encoder->fieldLoc[i].kind) { case UNUSED: break; + case IMM6: + value = ((lir->operands[i] & 0x20) >> 5) << 9; + value |= (lir->operands[i] & 0x1f) << 3; + bits |= value; + break; case BITBLT: value = (lir->operands[i] << encoder->fieldLoc[i].start) & ((1 << (encoder->fieldLoc[i].end + 1)) - 1); @@ -575,11 +639,19 @@ static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr) encoder->fieldLoc[i].start; bits |= value; break; - case IMMSHIFT8: case IMM12: + case MODIMM: value = ((lir->operands[i] & 0x800) >> 11) << 26; value |= ((lir->operands[i] & 0x700) >> 8) << 12; value |= lir->operands[i] & 0x0ff; + bits |= value; + break; + case IMM16: + value = ((lir->operands[i] & 0x0800) >> 11) << 26; + value |= ((lir->operands[i] & 0xf000) >> 12) << 16; + value |= ((lir->operands[i] & 0x0700) >> 8) << 12; + value |= lir->operands[i] & 0x0ff; + bits |= value; break; default: assert(0); diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c index 5f43b870a..11aaeddaf 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.c +++ b/vm/compiler/codegen/arm/LocalOptimizations.c @@ -18,6 +18,27 @@ #include "vm/compiler/CompilerInternals.h" #include "ArmLIR.h" +ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc); + +/* Is this a Dalvik register access? */ +static inline bool isDalvikLoad(ArmLIR *lir) +{ + return ((lir->operands[1] == rFP) && + ((lir->opCode == THUMB_LDR_RRI5) || + (lir->opCode == THUMB2_LDR_RRI12) || + (lir->opCode == THUMB2_VLDRS) || + (lir->opCode == THUMB2_VLDRD))); +} + +static inline bool isDalvikStore(ArmLIR *lir) +{ + return ((lir->operands[1] == rFP) && + ((lir->opCode == THUMB_STR_RRI5) || + (lir->opCode == THUMB2_STR_RRI12) || + (lir->opCode == THUMB2_VSTRS) || + (lir->opCode == THUMB2_VSTRD))); +} + /* * Perform a pass of top-down walk to * 1) Eliminate redundant loads and stores @@ -37,8 +58,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, if (thisLIR->age >= cUnit->optRound) { continue; } - if (thisLIR->opCode == THUMB_STR_RRI5 && - thisLIR->operands[1] == rFP) { + if (isDalvikStore(thisLIR)) { int dRegId = thisLIR->operands[2]; int nativeRegId = thisLIR->operands[0]; ArmLIR *checkLIR; @@ -49,16 +69,17 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, checkLIR = NEXT_LIR(checkLIR)) { /* Check if a Dalvik register load is redundant */ - if (checkLIR->opCode == THUMB_LDR_RRI5 && - checkLIR->operands[1] == rFP && - checkLIR->operands[2] == dRegId) { + if (isDalvikLoad(checkLIR) && + checkLIR->operands[2] == dRegId ) { + if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) { + break; // TODO: handle gen<=>float copies + } /* Insert a move to replace the load */ if (checkLIR->operands[0] != nativeRegId) { - ArmLIR *moveLIR = - dvmCompilerNew(sizeof(ArmLIR), true); - moveLIR->opCode = THUMB_MOV_RR; - moveLIR->operands[0] = checkLIR->operands[0]; - moveLIR->operands[1] = nativeRegId; + ArmLIR *moveLIR; + moveLIR = dvmCompilerRegCopy(cUnit, + checkLIR->operands[0], + nativeRegId); /* * Insertion is guaranteed to succeed since checkLIR * is never the first LIR on the list @@ -70,8 +91,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, continue; /* Found a true output dependency - nuke the previous store */ - } else if (checkLIR->opCode == THUMB_STR_RRI5 && - checkLIR->operands[1] == rFP && + } else if (isDalvikStore(checkLIR) && checkLIR->operands[2] == dRegId) { thisLIR->isNop = true; break; @@ -82,10 +102,6 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, /* Last instruction reached */ stopHere |= checkLIR->generic.next == NULL; - /* Store data is clobbered */ - stopHere |= (EncodingMap[checkLIR->opCode].flags & - CLOBBER_DEST) != 0 && - checkLIR->operands[0] == nativeRegId; /* * Conservatively assume there is a memory dependency * for st/ld multiples and reg+reg address mode @@ -93,16 +109,21 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, stopHere |= checkLIR->opCode == THUMB_STMIA || checkLIR->opCode == THUMB_LDMIA || checkLIR->opCode == THUMB_STR_RRR || - checkLIR->opCode == THUMB_LDR_RRR; + checkLIR->opCode == THUMB_LDR_RRR || + checkLIR->opCode == THUMB2_VLDRD || + checkLIR->opCode == THUMB2_VSTRD; +; -// FIXME: need to enhance this code to sink & play well with coprocessor ld/str - stopHere |= checkLIR->opCode == THUMB2_VSTRS || - checkLIR->opCode == THUMB2_VSTRD || - checkLIR->opCode == THUMB2_VLDRS || - checkLIR->opCode == THUMB2_VLDRD; + if (!isPseudoOpCode(checkLIR->opCode)) { - stopHere |= (EncodingMap[checkLIR->opCode].flags & - IS_BRANCH) != 0; + /* Store data is clobbered */ + stopHere |= (EncodingMap[checkLIR->opCode].flags & + CLOBBER_DEST) != 0 && + checkLIR->operands[0] == nativeRegId; + + stopHere |= (EncodingMap[checkLIR->opCode].flags & + IS_BRANCH) != 0; + } /* Found a new place to put the store - move it here */ if (stopHere == true) { diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c index 1dd009b4e..3a9f1de1e 100644 --- a/vm/compiler/codegen/arm/Thumb2Util.c +++ b/vm/compiler/codegen/arm/Thumb2Util.c @@ -45,6 +45,7 @@ static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, ArmConditionCode cond, int reg, int checkValue, int dOffset, ArmLIR *pcrLabel); +ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc); /*****************************************************************************/ @@ -132,14 +133,96 @@ static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc, /*****************************************************************************/ +ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc) +{ + ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true); + res->operands[0] = rDest; + res->operands[1] = rSrc; + if (rDest == rSrc) { + res->isNop = true; + } else { + if (LOWREG(rDest) && LOWREG(rSrc)) { + res->opCode = THUMB_MOV_RR; + } else if (FPREG(rDest) && FPREG(rSrc)) { + if (DOUBLEREG(rDest)) { + assert(DOUBLEREG(rSrc)); + res->opCode = THUMB2_VMOVD; + } else { + assert(SINGLEREG(rSrc)); + res->opCode = THUMB2_VMOVS; + } + } else { + // TODO: support copy between FP and gen regs. + assert(!FPREG(rDest)); + assert(!FPREG(rSrc)); + res->opCode = THUMB2_MOV_RR; + } + } + return res; +} + +static int leadingZeros(u4 val) +{ + u4 alt; + int n; + int count; + + count = 16; + n = 32; + do { + alt = val >> count; + if (alt != 0) { + n = n - count; + val = alt; + } + count >>= 1; + } while (count); + return n - val; +} + +/* + * Determine whether value can be encoded as a Thumb modified + * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. + */ +static int modifiedImmediate(u4 value) +{ + int zLeading; + int zTrailing; + u4 b0 = value & 0xff; + + /* Note: case of value==0 must use 0:000:0:0000000 encoding */ + if (value <= 0xFF) + return b0; // 0:000:a:bcdefgh + if (value == ((b0 << 16) | b0)) + return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */ + if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0)) + return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */ + b0 = (value >> 8) & 0xff; + if (value == ((b0 << 24) | (b0 << 8))) + return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */ + /* Can we do it with rotation? */ + zLeading = leadingZeros(value); + zTrailing = 32 - leadingZeros(~value & (value - 1)); + /* A run of eight or fewer active bits? */ + if ((zLeading + zTrailing) < 24) + return -1; /* No - bail */ + /* left-justify the constant, discarding msb (known to be 1) */ + value <<= zLeading + 1; + /* Create bcdefgh */ + value >>= 25; + /* Put it all together */ + return value | ((0x8 + zLeading) << 7); /* [01000..11111]:bcdefgh */ +} + /* * Load a immediate using a shortcut if possible; otherwise * grab from the per-translation literal pool */ static void loadConstant(CompilationUnit *cUnit, int rDest, int value) { + int modImm; /* See if the value can be constructed cheaply */ - if ((value >= 0) && (value <= 255)) { + if ((value & 0xff) == value) { newLIR2(cUnit, THUMB_MOV_IMM, rDest, value); return; } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) { @@ -147,6 +230,17 @@ static void loadConstant(CompilationUnit *cUnit, int rDest, int value) newLIR2(cUnit, THUMB_MVN, rDest, rDest); return; } + /* Check Modified immediate special cases */ + modImm = modifiedImmediate(value); + if (modImm >= 0) { + newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, rDest, modImm); + return; + } + /* 16-bit immediate? */ + if ((value & 0xffff) == value) { + newLIR2(cUnit, THUMB2_MOV_IMM16, rDest, value); + return; + } /* No shortcut - go ahead and use literal pool */ ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255); if (dataTarget == NULL) { @@ -172,9 +266,8 @@ static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr) { int offset = offsetof(StackSaveArea, xtra.currentPc); loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); - newLIR2(cUnit, THUMB_MOV_RR, rAddr, rFP); - newLIR2(cUnit, THUMB_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset); - newLIR3(cUnit, THUMB_STR_RRI5, rDPC, rAddr, 0); + newLIR3(cUnit, THUMB2_STR_RRI8_PREDEC, rDPC, rFP, + sizeof(StackSaveArea) - offset); } /* Generate conditional branch instructions */ @@ -201,22 +294,20 @@ static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target) static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, int rDestHi) { + bool allLowRegs = (LOWREG(rDestLo) && LOWREG(rDestHi)); + /* Use reg + imm5*4 to load the values if possible */ - if (vSrc <= 30) { + if (allLowRegs && vSrc <= 30) { newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc); newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1); } else { - if (vSrc <= 64) { - /* Sneak 4 into the base address first */ - newLIR3(cUnit, THUMB_ADD_RRI3, rDestLo, rFP, 4); - newLIR2(cUnit, THUMB_ADD_RI8, rDestLo, (vSrc-1)*4); + assert(rDestLo < rDestHi); + loadValueAddress(cUnit, vSrc, rDestLo); + if (allLowRegs) { + newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi))); } else { - /* Offset too far from rFP */ - loadConstant(cUnit, rDestLo, vSrc*4); - newLIR3(cUnit, THUMB_ADD_RRR, rDestLo, rFP, rDestLo); + assert(0); // Unimp - need Thumb2 ldmia } - assert(rDestLo < rDestHi); - newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<<rDestLo) | (1<<(rDestHi))); } } @@ -227,49 +318,74 @@ static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi, int vDest, int rScratch) { + bool allLowRegs = (LOWREG(rSrcLo) && LOWREG(rSrcHi)); killNullCheckedRegister(cUnit, vDest); killNullCheckedRegister(cUnit, vDest+1); updateLiveRegisterPair(cUnit, vDest, rSrcLo, rSrcHi); /* Use reg + imm5*4 to store the values if possible */ - if (vDest <= 30) { + if (allLowRegs && vDest <= 30) { newLIR3(cUnit, THUMB_STR_RRI5, rSrcLo, rFP, vDest); newLIR3(cUnit, THUMB_STR_RRI5, rSrcHi, rFP, vDest+1); } else { - if (vDest <= 64) { - /* Sneak 4 into the base address first */ - newLIR3(cUnit, THUMB_ADD_RRI3, rScratch, rFP, 4); - newLIR2(cUnit, THUMB_ADD_RI8, rScratch, (vDest-1)*4); + assert(rSrcLo < rSrcHi); + loadValueAddress(cUnit, vDest, rScratch); + if (allLowRegs) { + newLIR2(cUnit, THUMB_STMIA, rScratch, + (1<<rSrcLo) | (1 << (rSrcHi))); } else { - /* Offset too far from rFP */ - loadConstant(cUnit, rScratch, vDest*4); - newLIR3(cUnit, THUMB_ADD_RRR, rScratch, rFP, rScratch); + assert(0); // Unimp - need Thumb2 stmia } - assert(rSrcLo < rSrcHi); - newLIR2(cUnit, THUMB_STMIA, rScratch, (1<<rSrcLo) | (1 << (rSrcHi))); } } -/* Load the address of a Dalvik register on the frame */ -static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest) +static void addRegisterRegister(CompilationUnit *cUnit, int rDest, + int rSrc1, int rSrc2) { - /* RRI3 can add up to 7 */ - if (vSrc <= 1) { - newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rFP, vSrc*4); - } else if (vSrc <= 64) { - /* Sneak 4 into the base address first */ - newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rFP, 4); - newLIR2(cUnit, THUMB_ADD_RI8, rDest, (vSrc-1)*4); + if (!LOWREG(rDest) || !LOWREG(rSrc1) || !LOWREG(rSrc2)) { + assert(0); // Unimp + //newLIR3(cUnit, THUMB2_ADD_RRR, rDest, rFP, rDest); } else { - loadConstant(cUnit, rDest, vSrc*4); newLIR3(cUnit, THUMB_ADD_RRR, rDest, rFP, rDest); } } +/* Add in immediate to a register. */ +static void addRegisterImmediate(CompilationUnit *cUnit, int rDest, int rSrc, + int value) +{ +// TODO: check for modified immediate form + if (LOWREG(rDest) && LOWREG(rSrc) && (value <= 7)) { + newLIR3(cUnit, THUMB_ADD_RRI3, rDest, rSrc, value); + } else if (LOWREG(rDest) && (rDest == rSrc) && ((value & 0xff) == 0xff)) { + newLIR2(cUnit, THUMB_ADD_RI8, rDest, value); + } else if (value <= 4095) { + newLIR3(cUnit, THUMB2_ADD_RRI12, rDest, rSrc, value); + } else { + loadConstant(cUnit, rDest, value); + addRegisterRegister(cUnit, rDest, rDest, rFP); + } +} + +/* Load the address of a Dalvik register on the frame */ +static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int rDest) +{ + addRegisterImmediate(cUnit, rDest, rFP, vSrc*4); +} + +/* + * FIXME: We need a general register temp for all of these coprocessor + * operations in case we can't reach in 1 shot. Might just want to + * designate a hot temp that all codegen routines could use in their + * scope. Alternately, callers will need to allocate a temp and + * pass it in to each of these. + */ + /* Load a float from a Dalvik register */ static void loadFloat(CompilationUnit *cUnit, int vSrc, int rDest) { assert(vSrc <= 255); // FIXME - temp limit to 1st 256 + assert(SINGLEREG(rDest)); newLIR3(cUnit, THUMB2_VLDRS, rDest, rFP, vSrc); } @@ -278,6 +394,7 @@ static void storeFloat(CompilationUnit *cUnit, int rSrc, int vDest, int rScratch) { assert(vSrc <= 255); // FIXME - temp limit to 1st 256 + assert(SINGLEREG(rSrc)); newLIR3(cUnit, THUMB2_VSTRS, rSrc, rFP, vDest); } @@ -285,6 +402,7 @@ static void storeFloat(CompilationUnit *cUnit, int rSrc, int vDest, static void loadDouble(CompilationUnit *cUnit, int vSrc, int rDest) { assert(vSrc <= 255); // FIXME - temp limit to 1st 256 + assert(DOUBLEREG(rDest)); newLIR3(cUnit, THUMB2_VLDRD, rDest, rFP, vSrc); } @@ -293,6 +411,7 @@ static void storeDouble(CompilationUnit *cUnit, int rSrc, int vDest, int rScratch) { assert(vSrc <= 255); // FIXME - temp limit to 1st 256 + assert(DOUBLEREG(rSrc)); newLIR3(cUnit, THUMB2_VSTRD, rSrc, rFP, vDest); } @@ -300,26 +419,27 @@ static void storeDouble(CompilationUnit *cUnit, int rSrc, int vDest, /* Load a single value from rFP[src] and store them into rDest */ static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest) { - /* Use reg + imm5*4 to load the value if possible */ - if (vSrc <= 31) { - newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rFP, vSrc); - } else { - loadConstant(cUnit, rDest, vSrc*4); - newLIR3(cUnit, THUMB_LDR_RRR, rDest, rFP, rDest); - } + loadWordDisp(cUnit, rFP, vSrc * 4, rDest); } /* Load a word at base + displacement. Displacement must be word multiple */ static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement, int rDest) { + bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest)); assert((displacement & 0x3) == 0); /* Can it fit in a RRI5? */ - if (displacement < 128) { + if (allLowRegs && displacement < 128) { newLIR3(cUnit, THUMB_LDR_RRI5, rDest, rBase, displacement >> 2); + } else if (displacement < 4092) { + newLIR3(cUnit, THUMB2_LDR_RRI12, rDest, rFP, displacement); } else { loadConstant(cUnit, rDest, displacement); - newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest); + if (allLowRegs) { + newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest); + } else { + assert(0); // Unimp - need Thumb2 ldr_rrr + } } } @@ -331,11 +451,17 @@ static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, updateLiveRegister(cUnit, vDest, rSrc); /* Use reg + imm5*4 to store the value if possible */ - if (vDest <= 31) { + if (LOWREG(rSrc) && vDest <= 31) { newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest); + } else if (vDest <= 1023) { + newLIR3(cUnit, THUMB2_STR_RRI12, rSrc, rFP, vDest*4); } else { loadConstant(cUnit, rScratch, vDest*4); - newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch); + if (LOWREG(rSrc)) { + newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch); + } else { + assert(0); // Unimp: Need generic str_rrr routine + } } } @@ -343,12 +469,20 @@ static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, * Perform a "reg cmp imm" operation and jump to the PCR region if condition * satisfies. */ -static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, +static ArmLIR *genRegImmCheck(CompilationUnit *cUnit, ArmConditionCode cond, int reg, int checkValue, int dOffset, ArmLIR *pcrLabel) { - newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue); - ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); + ArmLIR *branch; + if ((LOWREG(reg)) && (checkValue == 0) && + ((cond == ARM_COND_EQ) || (cond == ARM_COND_NE))) { + branch = newLIR2(cUnit, + (cond == ARM_COND_EQ) ? THUMB2_CBZ : THUMB2_CBNZ, + reg, 0); + } else { + newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue); + branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); + } return genCheckCommon(cUnit, dOffset, branch, pcrLabel); } diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c index 69bb0f75d..8be50add8 100644 --- a/vm/compiler/codegen/arm/ThumbUtil.c +++ b/vm/compiler/codegen/arm/ThumbUtil.c @@ -45,6 +45,7 @@ static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, ArmConditionCode cond, int reg, int checkValue, int dOffset, ArmLIR *pcrLabel); +ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc); /*****************************************************************************/ @@ -132,6 +133,19 @@ static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc, /*****************************************************************************/ +ArmLIR* dvmCompilerRegCopy(CompilationUnit *cUnit, int rDest, int rSrc) +{ + ArmLIR* res = dvmCompilerNew(sizeof(ArmLIR), true); + assert(LOWREG(rDest) && LOWREG(rSrc)); + res->operands[0] = rDest; + res->operands[1] = rSrc; + res->opCode = THUMB_MOV_RR; + if (rDest == rSrc) { + res->isNop = true; + } + return res; +} + /* * Load a immediate using a shortcut if possible; otherwise * grab from the per-translation literal pool diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c index 794d75484..92097af59 100644 --- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c +++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c @@ -119,11 +119,11 @@ static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir) { int offset = offsetof(InterpState, retval); int vSrc = mir->dalvikInsn.vA; - loadDouble(cUnit, vSrc, fr2); - newLIR2(cUnit, THUMB2_VSQRTD, fr0, fr2); + loadDouble(cUnit, vSrc, dr1); + newLIR2(cUnit, THUMB2_VSQRTD, dr0, dr1); assert(offset & 0x3 == 0); /* Must be word aligned */ assert(offset < 1024); - newLIR3(cUnit, THUMB2_VSTRD, fr0, rGLUE, offset >> 2); + newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2); return true; } @@ -212,10 +212,10 @@ static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, default: return true; } - loadDouble(cUnit, vSrc1, fr2); - loadDouble(cUnit, vSrc2, fr4); - newLIR3(cUnit, op, fr0, fr2, fr4); - storeDouble(cUnit, fr0, vDest, 0); + loadDouble(cUnit, vSrc1, dr1); + loadDouble(cUnit, vSrc2, dr2); + newLIR3(cUnit, op, dr0, dr1, dr2); + storeDouble(cUnit, dr0, vDest, 0); return false; } @@ -227,6 +227,8 @@ static bool genConversion(CompilationUnit *cUnit, MIR *mir) int op = THUMB_BKPT; bool longSrc = false; bool longDest = false; + int srcReg; + int tgtReg; switch (opCode) { case OP_INT_TO_FLOAT: @@ -267,15 +269,20 @@ static bool genConversion(CompilationUnit *cUnit, MIR *mir) default: return true; } - if (longSrc) - loadDouble(cUnit, vSrc2, fr2); - else - loadFloat(cUnit, vSrc2, fr2); - newLIR2(cUnit, op, fr0, fr2); - if (longDest) - storeDouble(cUnit, fr0, vSrc1Dest, 0); - else + if (longSrc) { + srcReg = dr1; + loadDouble(cUnit, vSrc2, srcReg); + } else { + srcReg = fr2; + loadFloat(cUnit, vSrc2, srcReg); + } + if (longDest) { + newLIR2(cUnit, op, dr0, srcReg); + storeDouble(cUnit, dr0, vSrc1Dest, 0); + } else { + newLIR2(cUnit, op, fr0, srcReg); storeFloat(cUnit, fr0, vSrc1Dest, 0); + } return false; } |