summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Cheng <bccheng@google.com>2009-09-22 11:23:36 -0700
committerBen Cheng <bccheng@google.com>2009-09-25 09:37:09 -0700
commitd7d426a1d746f70edeaeccf77886f3ad8298e28c (patch)
treef5afc7ee0a7ac0bdfa7efd588e2b0142e76fe3eb
parent4d98401f36cc1ee3580c2dff1d740e850f854d04 (diff)
downloadandroid_dalvik-d7d426a1d746f70edeaeccf77886f3ad8298e28c.tar.gz
android_dalvik-d7d426a1d746f70edeaeccf77886f3ad8298e28c.tar.bz2
android_dalvik-d7d426a1d746f70edeaeccf77886f3ad8298e28c.zip
Implemented a new scheduler and FP register allocator.
Improved performance by 50% over existing JIT for some FP benchmarks.
-rw-r--r--vm/compiler/Compiler.c5
-rw-r--r--vm/compiler/CompilerUtility.h5
-rw-r--r--vm/compiler/codegen/Optimizer.h2
-rw-r--r--vm/compiler/codegen/arm/ArchUtility.c72
-rw-r--r--vm/compiler/codegen/arm/ArmLIR.h35
-rw-r--r--vm/compiler/codegen/arm/Assemble.c44
-rw-r--r--vm/compiler/codegen/arm/Codegen.c122
-rw-r--r--vm/compiler/codegen/arm/Codegen.h6
-rw-r--r--vm/compiler/codegen/arm/LocalOptimizations.c228
-rw-r--r--vm/compiler/codegen/arm/Thumb2Util.c156
-rw-r--r--vm/compiler/codegen/arm/ThumbUtil.c41
-rw-r--r--vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c2
-rw-r--r--vm/compiler/codegen/arm/armv5te/ArchVariant.c2
-rw-r--r--vm/compiler/codegen/arm/armv7-a/ArchVariant.c87
14 files changed, 575 insertions, 232 deletions
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c
index 47881cadd..39a988d94 100644
--- a/vm/compiler/Compiler.c
+++ b/vm/compiler/Compiler.c
@@ -227,11 +227,6 @@ bool dvmCompilerStartup(void)
/* Track method-level compilation statistics */
gDvmJit.methodStatsTable = dvmHashTableCreate(32, NULL);
- /*
- * FIXME - temporarily disable optimizations for this intermediate checkin
- */
- gDvmJit.disableOpt = 0x3;
-
dvmUnlockMutex(&gDvmJit.compilerLock);
return true;
diff --git a/vm/compiler/CompilerUtility.h b/vm/compiler/CompilerUtility.h
index c72a8da34..409de2967 100644
--- a/vm/compiler/CompilerUtility.h
+++ b/vm/compiler/CompilerUtility.h
@@ -40,11 +40,14 @@ typedef struct GrowableList {
#define GET_ELEM_N(LIST, TYPE, N) (((TYPE*) LIST->elemList)[N])
+struct LIR;
+
void dvmInitGrowableList(GrowableList *gList, size_t initLength);
void dvmInsertGrowableList(GrowableList *gList, void *elem);
-
BitVector* dvmCompilerAllocBitVector(int startBits, bool expandable);
bool dvmCompilerSetBit(BitVector* pBits, int num);
void dvmDebugBitVector(char *msg, const BitVector *bv, int length);
+void dvmDumpLIRInsn(struct LIR *lir, unsigned char *baseAddr);
+void dvmDumpResourceMask(struct LIR *lir, u8 mask, const char *prefix);
#endif /* _DALVIK_COMPILER_UTILITY */
diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h
index 0931df3d0..487e8648e 100644
--- a/vm/compiler/codegen/Optimizer.h
+++ b/vm/compiler/codegen/Optimizer.h
@@ -44,6 +44,8 @@ typedef struct RegisterScoreboard {
int nativeReg; // And the mapped native register
int nativeRegHi; // And the mapped native register
bool isWide; // Whether a pair of registers are alive
+ int fp[32]; // Track the Dalvik register held in a SFP reg
+ int nextFP; // Next index for FP register allocation
} RegisterScoreboard;
void dvmCompilerApplyLocalOptimizations(struct CompilationUnit *cUnit,
diff --git a/vm/compiler/codegen/arm/ArchUtility.c b/vm/compiler/codegen/arm/ArchUtility.c
index 551e1f753..d1f9870ff 100644
--- a/vm/compiler/codegen/arm/ArchUtility.c
+++ b/vm/compiler/codegen/arm/ArchUtility.c
@@ -110,9 +110,6 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf,
case 'd':
sprintf(tbuf,"%d", operand);
break;
- case 'D':
- sprintf(tbuf,"%d", operand+8);
- break;
case 'E':
sprintf(tbuf,"%d", operand*4);
break;
@@ -193,8 +190,49 @@ static void buildInsnString(char *fmt, ArmLIR *lir, char* buf,
*buf = 0;
}
+void dvmDumpResourceMask(LIR *lir, u8 mask, const char *prefix)
+{
+ char buf[256];
+ buf[0] = 0;
+ ArmLIR *armLIR = (ArmLIR *) lir;
+
+ if (mask == ENCODE_ALL) {
+ strcpy(buf, "all");
+ } else {
+ char num[8];
+ int i;
+
+ for (i = 0; i < kRegEnd; i++) {
+ if (mask & (1ULL << i)) {
+ sprintf(num, "%d ", i);
+ strcat(buf, num);
+ }
+ }
+
+ if (mask & ENCODE_CCODE) {
+ strcat(buf, "cc ");
+ }
+ if (mask & ENCODE_FP_STATUS) {
+ strcat(buf, "fpcc ");
+ }
+ if (armLIR && (mask & ENCODE_DALVIK_REG)) {
+ sprintf(buf + strlen(buf), "dr%d%s", armLIR->aliasInfo & 0xffff,
+ (armLIR->aliasInfo & 0x80000000) ? "(+1)" : "");
+ }
+ }
+ if (buf[0]) {
+ LOGD("%s: %s", prefix, buf);
+ }
+}
+
+/*
+ * Debugging macros
+ */
+#define DUMP_RESOURCE_MASK(X)
+#define DUMP_SSA_REP(X)
+
/* Pretty-print a LIR instruction */
-static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
+void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr)
{
ArmLIR *lir = (ArmLIR *) arg;
char buf[256];
@@ -202,15 +240,17 @@ static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
int offset = lir->generic.offset;
int dest = lir->operands[0];
u2 *cPtr = (u2*)baseAddr;
+ const bool dumpNop = false;
+
/* Handle pseudo-ops individually, and all regular insns as a group */
switch(lir->opCode) {
- case ARM_PSEUDO_IT_BOTTOM:
- LOGD("-------- IT_Bottom");
+ case ARM_PSEUDO_BARRIER:
+ LOGD("-------- BARRIER");
break;
case ARM_PSEUDO_EXTENDED_MIR:
/* intentional fallthrough */
case ARM_PSEUDO_SSA_REP:
- LOGD("-------- %s\n", (char *) dest);
+ DUMP_SSA_REP(LOGD("-------- %s\n", (char *) dest));
break;
case ARM_PSEUDO_TARGET_LABEL:
break;
@@ -258,17 +298,27 @@ static void dumpLIRInsn(LIR *arg, unsigned char *baseAddr)
LOGD("L%#06x:\n", dest);
break;
default:
- if (lir->isNop) {
+ if (lir->isNop && !dumpNop) {
break;
}
buildInsnString(EncodingMap[lir->opCode].name, lir, opName,
baseAddr, 256);
buildInsnString(EncodingMap[lir->opCode].fmt, lir, buf, baseAddr,
256);
- LOGD("%p (%04x): %-8s%s\n",
- baseAddr + offset, offset, opName, buf);
+ LOGD("%p (%04x): %-8s%s%s\n",
+ baseAddr + offset, offset, opName, buf,
+ lir->isNop ? "(nop)" : "");
break;
}
+
+ if (lir->useMask && (!lir->isNop || dumpNop)) {
+ DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir,
+ lir->useMask, "use"));
+ }
+ if (lir->defMask && (!lir->isNop || dumpNop)) {
+ DUMP_RESOURCE_MASK(dvmDumpResourceMask((LIR *) lir,
+ lir->defMask, "def"));
+ }
}
/* Dump instructions and constant pool contents */
@@ -281,7 +331,7 @@ void dvmCompilerCodegenDump(CompilationUnit *cUnit)
LOGD("installed code is at %p\n", cUnit->baseAddr);
LOGD("total size is %d bytes\n", cUnit->totalSize);
for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) {
- dumpLIRInsn(lirInsn, cUnit->baseAddr);
+ dvmDumpLIRInsn(lirInsn, cUnit->baseAddr);
}
for (lirInsn = cUnit->wordList; lirInsn; lirInsn = lirInsn->next) {
armLIR = (ArmLIR *) lirInsn;
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 20fb6bf66..8fedda24c 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -66,6 +66,7 @@
/* Offset to distinguish DP FP regs */
#define FP_DOUBLE 64
/* Reg types */
+#define REGTYPE(x) (x & (FP_REG_OFFSET | FP_DOUBLE))
#define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET)
#define LOWREG(x) ((x & 0x7) == x)
#define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE)
@@ -83,21 +84,23 @@ typedef enum ResourceEncodingPos {
kRegLR = 14,
kRegPC = 15,
kFPReg0 = 16,
- kITBlock = 48,
- kCCode = 49,
- kFPStatus = 50,
+ kRegEnd = 48,
+ kCCode = kRegEnd,
+ kFPStatus,
+ kDalvikReg,
} ResourceEncodingPos;
-#define ENCODE_GP_REG(N) (1ULL << N)
#define ENCODE_REG_LIST(N) ((u8) N)
#define ENCODE_REG_SP (1ULL << kRegSP)
#define ENCODE_REG_LR (1ULL << kRegLR)
#define ENCODE_REG_PC (1ULL << kRegPC)
-#define ENCODE_SFP_REG(N) (1ULL << (N - FP_REG_OFFSET + kFPReg0))
-#define ENCODE_DFP_REG(N) (3ULL << (((N - FP_DOUBLE) << 1) + kFPReg0))
-#define ENCODE_IT_BLOCK (1ULL << kITBlock)
#define ENCODE_CCODE (1ULL << kCCode)
#define ENCODE_FP_STATUS (1ULL << kFPStatus)
+#define ENCODE_DALVIK_REG (1ULL << kDalvikReg)
+#define ENCODE_ALL (~0ULL)
+
+#define DECODE_ALIAS_INFO_REG(X) (X & 0xffff)
+#define DECODE_ALIAS_INFO_WIDE(X) ((X & 0x80000000) ? 1 : 0)
typedef enum OpSize {
WORD,
@@ -240,7 +243,7 @@ typedef enum ArmConditionCode {
* Assemble.c.
*/
typedef enum ArmOpCode {
- ARM_PSEUDO_IT_BOTTOM = -17,
+ ARM_PSEUDO_BARRIER = -17,
ARM_PSEUDO_EXTENDED_MIR = -16,
ARM_PSEUDO_SSA_REP = -15,
ARM_PSEUDO_ENTRY_BLOCK = -14,
@@ -532,6 +535,7 @@ typedef enum ArmOpFeatureFlags {
kRegDef0,
kRegDef1,
kRegDefSP,
+ kRegDefLR,
kRegDefList0,
kRegDefList1,
kRegUse0,
@@ -555,13 +559,14 @@ typedef enum ArmOpFeatureFlags {
#define REG_DEF0 (1 << kRegDef0)
#define REG_DEF1 (1 << kRegDef1)
#define REG_DEF_SP (1 << kRegDefSP)
+#define REG_DEF_LR (1 << kRegDefLR)
#define REG_DEF_LIST0 (1 << kRegDefList0)
#define REG_DEF_LIST1 (1 << kRegDefList1)
#define REG_USE0 (1 << kRegUse0)
#define REG_USE1 (1 << kRegUse1)
#define REG_USE2 (1 << kRegUse2)
-#define REG_USE_PC (1 << kRegUsePC)
#define REG_USE_SP (1 << kRegUseSP)
+#define REG_USE_PC (1 << kRegUsePC)
#define REG_USE_LIST0 (1 << kRegUseList0)
#define REG_USE_LIST1 (1 << kRegUseList1)
#define NO_OPERAND (1 << kNoOperand)
@@ -574,11 +579,14 @@ typedef enum ArmOpFeatureFlags {
#define USES_CCODES (1 << kUsesCCodes)
/* Common combo register usage patterns */
-#define REG_DEF0_USE1 (REG_DEF0 | REG_USE1)
-#define REG_DEF0_USE01 (REG_DEF0 | REG_USE0 | REG_USE1)
-#define REG_DEF0_USE12 (REG_DEF0 | REG_USE1 | REG_USE2)
#define REG_USE01 (REG_USE0 | REG_USE1)
-#define REG_USE012 (REG_USE0 | REG_USE1 | REG_USE2)
+#define REG_USE012 (REG_USE01 | REG_USE2)
+#define REG_USE12 (REG_USE1 | REG_USE2)
+#define REG_DEF0_USE0 (REG_DEF0 | REG_USE0)
+#define REG_DEF0_USE1 (REG_DEF0 | REG_USE1)
+#define REG_DEF0_USE01 (REG_DEF0 | REG_USE01)
+#define REG_DEF0_USE12 (REG_DEF0 | REG_USE12)
+#define REG_DEF01_USE2 (REG_DEF0 | REG_DEF1 | REG_USE2)
/* Instruction assembly fieldLoc kind */
typedef enum ArmEncodingKind {
@@ -635,6 +643,7 @@ typedef struct ArmLIR {
bool isNop; // LIR is optimized away
int age; // default is 0, set lazily by the optimizer
int size; // 16-bit unit size (1 for thumb, 1 or 2 for thumb2)
+ int aliasInfo; // For Dalvik register access disambiguation
u8 useMask; // Resource mask for use
u8 defMask; // Resource mask for def
} ArmLIR;
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 4d7dadc36..6147fd6e1 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -56,7 +56,6 @@
* [f]ormats:
* h -> 4-digit hex
* d -> decimal
- * D -> decimal+8 (used to convert 3-bit regnum field to high reg)
* E -> decimal*4
* F -> decimal*2
* c -> branch condition (beq, bne, etc.)
@@ -89,7 +88,7 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
"adds", "r!0d, r!1d, #!2d", 1),
ENCODING_MAP(THUMB_ADD_RI8, 0x3000,
BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE0 | SETS_CCODES,
+ IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
"adds", "r!0d, r!0d, #!1d", 1),
ENCODING_MAP(THUMB_ADD_RRR, 0x1800,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
@@ -149,23 +148,23 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
"bkpt", "!0d", 1),
ENCODING_MAP(THUMB_BLX_1, 0xf000,
BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | IS_BRANCH,
+ IS_BINARY_OP | IS_BRANCH | REG_DEF_LR,
"blx_1", "!0u", 1),
ENCODING_MAP(THUMB_BLX_2, 0xe800,
BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | IS_BRANCH,
+ IS_BINARY_OP | IS_BRANCH | REG_DEF_LR,
"blx_2", "!0v", 1),
ENCODING_MAP(THUMB_BL_1, 0xf000,
BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_UNARY_OP | IS_BRANCH,
+ IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
"bl_1", "!0u", 1),
ENCODING_MAP(THUMB_BL_2, 0xf800,
BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_UNARY_OP | IS_BRANCH,
+ IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
"bl_2", "!0v", 1),
ENCODING_MAP(THUMB_BLX_R, 0x4780,
BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_UNARY_OP | IS_BRANCH,
+ IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
"blx", "r!0d", 1),
ENCODING_MAP(THUMB_BX, 0x4700,
BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
@@ -186,22 +185,22 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
ENCODING_MAP(THUMB_CMP_LH, 0x4540,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "r!0d, r!1D", 1),
+ "cmp", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_CMP_HL, 0x4580,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "r!0D, r!1d", 1),
+ "cmp", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_CMP_HH, 0x45c0,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "r!0D, r!1D", 1),
+ "cmp", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_EOR_RR, 0x4040,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
"eors", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_LDMIA, 0xc800,
BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_DEF_LIST1,
+ IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1,
"ldmia", "r!0d!!, <!1R>", 1),
ENCODING_MAP(THUMB_LDR_RRI5, 0x6800,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, UNUSED, -1, -1,
@@ -270,15 +269,15 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
ENCODING_MAP(THUMB_MOV_RR_H2H, 0x46c0,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "r!0D, r!1D", 1),
+ "mov", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_MOV_RR_H2L, 0x4640,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "r!0d, r!1D", 1),
+ "mov", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_MOV_RR_L2H, 0x4680,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "r!0D, r!1d", 1),
+ "mov", "r!0d, r!1d", 1),
ENCODING_MAP(THUMB_MUL, 0x4340,
BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, UNUSED, -1, -1,
IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
@@ -349,7 +348,7 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
"subs", "r!0d, r!1d, #!2d]", 1),
ENCODING_MAP(THUMB_SUB_RI8, 0x3800,
BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE0 | SETS_CCODES,
+ IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
"subs", "r!0d, #!1d", 1),
ENCODING_MAP(THUMB_SUB_RRR, 0x1a00,
BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, UNUSED, -1, -1,
@@ -471,13 +470,13 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0, UNUSED, -1, -1,
IS_TERTIARY_OP | REG_DEF0_USE1,
"ldr", "r!0d,[r!1d, #-!2d]", 2),
- ENCODING_MAP(THUMB2_CBNZ, 0xb900,
+ ENCODING_MAP(THUMB2_CBNZ, 0xb900, /* Note: does not affect flags */
BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP, /* Note: does not affect flags */
+ IS_BINARY_OP | REG_USE0 | IS_BRANCH,
"cbnz", "r!0d,!1t", 1),
- ENCODING_MAP(THUMB2_CBZ, 0xb100,
+ ENCODING_MAP(THUMB2_CBZ, 0xb100, /* Note: does not affect flags */
BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP, /* Note: does not affect flags */
+ IS_BINARY_OP | REG_USE0 | IS_BRANCH,
"cbz", "r!0d,!1t", 1),
ENCODING_MAP(THUMB2_ADD_RRI12, 0xf2000000,
BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1, UNUSED, -1, -1,
@@ -497,11 +496,11 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
"vmov.f64 ", " !0S, !1S", 2),
ENCODING_MAP(THUMB2_LDMIA, 0xe8900000,
BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_DEF_LIST1,
+ IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1,
"ldmia", "r!0d!!, <!1R>", 2),
ENCODING_MAP(THUMB2_STMIA, 0xe8800000,
BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1,
+ IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1,
"stmia", "r!0d!!, <!1R>", 2),
ENCODING_MAP(THUMB2_ADD_RRR, 0xeb100000, /* setflags encoding */
BITBLT, 11, 8, BITBLT, 19, 16, BITBLT, 3, 0, SHIFT, -1, -1,
@@ -758,7 +757,7 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
"fmsr", "!0s, r!1d", 2),
ENCODING_MAP(THUMB2_FMRRD, 0xec500b10,
BITBLT, 15, 12, BITBLT, 19, 16, DFP, 5, 0, UNUSED, -1, -1,
- IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE2,
+ IS_TERTIARY_OP | REG_DEF01_USE2,
"fmrrd", "r!0d, r!1d, !2S", 2),
ENCODING_MAP(THUMB2_FMDRR, 0xec400b10,
DFP, 5, 0, BITBLT, 15, 12, BITBLT, 19, 16, UNUSED, -1, -1,
@@ -766,7 +765,6 @@ ArmEncodingMap EncodingMap[ARM_LAST] = {
"fmdrr", "!0S, r!1d, r!2d", 2),
};
-
/*
* The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
* not ready. Since r5 (rFP) is not updated often, it is less likely to
diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c
index 30dc508e5..65792bea8 100644
--- a/vm/compiler/codegen/arm/Codegen.c
+++ b/vm/compiler/codegen/arm/Codegen.c
@@ -122,9 +122,10 @@ static void selfVerificationLoad(InterpState* interpState)
if (heapSpacePtr == shadowSpace->heapSpaceTail)
data = *((unsigned int*) addr);
- //LOGD("*** HEAP LOAD: Addr: 0x%x Data: 0x%x", addr, data);
-
int reg = (heapArgSpace->regMap >> 4) & 0xF;
+
+ //LOGD("*** HEAP LOAD: Reg:%d Addr: 0x%x Data: 0x%x", reg, addr, data);
+
selfVerificationLoadDecodeData(heapArgSpace, data, reg);
}
@@ -483,6 +484,48 @@ static void selfVerificationMemOpWrapper(CompilationUnit *cUnit, int regMap,
#endif
/*
+ * Mark load/store instructions that access Dalvik registers through rFP +
+ * offset.
+ */
+static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad)
+{
+ if (isLoad) {
+ lir->useMask |= ENCODE_DALVIK_REG;
+ } else {
+ lir->defMask |= ENCODE_DALVIK_REG;
+ }
+
+ /*
+ * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit
+ * access.
+ */
+ lir->aliasInfo = regId;
+ if (DOUBLEREG(lir->operands[0])) {
+ lir->aliasInfo |= 0x80000000;
+ }
+}
+
+/*
+ * Decode the register id and mark the corresponding bit(s).
+ */
+static inline void setupRegMask(u8 *mask, int reg)
+{
+ u8 seed;
+ int shift;
+ int regId = reg & 0x1f;
+
+ /*
+ * Each double register is equal to a pair of single-precision FP registers
+ */
+ seed = DOUBLEREG(reg) ? 3 : 1;
+ /* FP register starts at bit position 16 */
+ shift = FPREG(reg) ? kFPReg0 : 0;
+ /* Expand the double register id into single offset */
+ shift += regId;
+ *mask |= seed << shift;
+}
+
+/*
* Set up the proper fields in the resource mask
*/
static void setupResourceMasks(ArmLIR *lir)
@@ -500,20 +543,25 @@ static void setupResourceMasks(ArmLIR *lir)
/* Set up the mask for resources that are updated */
if (flags & IS_BRANCH) {
lir->defMask |= ENCODE_REG_PC;
+ lir->useMask |= ENCODE_REG_PC;
}
if (flags & REG_DEF0) {
- lir->defMask |= ENCODE_GP_REG(lir->operands[0]);
+ setupRegMask(&lir->defMask, lir->operands[0]);
}
if (flags & REG_DEF1) {
- lir->defMask |= ENCODE_GP_REG(lir->operands[1]);
+ setupRegMask(&lir->defMask, lir->operands[1]);
}
if (flags & REG_DEF_SP) {
lir->defMask |= ENCODE_REG_SP;
}
+ if (flags & REG_DEF_SP) {
+ lir->defMask |= ENCODE_REG_LR;
+ }
+
if (flags & REG_DEF_LIST0) {
lir->defMask |= ENCODE_REG_LIST(lir->operands[0]);
}
@@ -528,7 +576,7 @@ static void setupResourceMasks(ArmLIR *lir)
/* Conservatively treat the IT block */
if (flags & IS_IT) {
- lir->defMask = -1;
+ lir->defMask = ENCODE_ALL;
}
/* Set up the mask for resources that are used */
@@ -541,7 +589,7 @@ static void setupResourceMasks(ArmLIR *lir)
for (i = 0; i < 3; i++) {
if (flags & (1 << (kRegUse0 + i))) {
- lir->useMask |= ENCODE_GP_REG(lir->operands[i]);
+ setupRegMask(&lir->useMask, lir->operands[i]);
}
}
}
@@ -698,6 +746,17 @@ static ArmLIR *scanLiteralPool(CompilationUnit *cUnit, int value,
return NULL;
}
+/*
+ * Generate an ARM_PSEUDO_BARRIER marker to indicate the boundary of special
+ * blocks.
+ */
+static void genBarrier(CompilationUnit *cUnit)
+{
+ ArmLIR *barrier = newLIR0(cUnit, ARM_PSEUDO_BARRIER);
+ /* Mark all resources as being clobbered */
+ barrier->defMask = -1;
+}
+
/* Perform the actual operation for OP_RETURN_* */
static void genReturnCommon(CompilationUnit *cUnit, MIR *mir)
{
@@ -1603,7 +1662,13 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir,
opRegRegImm(cUnit, OP_ADD, r4PC, rFP, srcOffset, rNone);
/* load [r0 .. min(numArgs,4)] */
regMask = (1 << ((numArgs < 4) ? numArgs : 4)) - 1;
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, r4PC, regMask);
+ genBarrier(cUnit);
opRegRegImm(cUnit, OP_SUB, r7, rFP,
sizeof(StackSaveArea) + (numArgs << 2), rNone);
@@ -1627,9 +1692,16 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir,
if (numArgs > 11) {
loadConstant(cUnit, 5, ((numArgs - 4) >> 2) << 2);
loopLabel = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ loopLabel->defMask = ENCODE_ALL;
}
storeMultiple(cUnit, r7, regMask);
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, r4PC, regMask);
+ genBarrier(cUnit);
/* No need to generate the loop structure if numArgs <= 11 */
if (numArgs > 11) {
opRegImm(cUnit, OP_SUB, rFP, 4, rNone);
@@ -1643,7 +1715,13 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir,
/* Generate the loop epilogue - don't use r0 */
if ((numArgs > 4) && (numArgs % 4)) {
regMask = ((1 << (numArgs & 0x3)) - 1) << 1;
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, r4PC, regMask);
+ genBarrier(cUnit);
}
if (numArgs >= 8)
opImm(cUnit, OP_POP, (1 << r0 | 1 << rFP));
@@ -1960,7 +2038,13 @@ static ArmLIR *loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo,
} else {
assert(rDestLo < rDestHi);
res = loadValueAddress(cUnit, vSrc, rDestLo);
+ /*
+ * Protect the loadMultiple instruction from being reordered with other
+ * Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
loadMultiple(cUnit, rDestLo, (1<<rDestLo) | (1<<rDestHi));
+ genBarrier(cUnit);
}
return res;
}
@@ -1984,7 +2068,13 @@ static ArmLIR *storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi,
} else {
assert(rSrcLo < rSrcHi);
res = loadValueAddress(cUnit, vDest, rScratch);
+ /*
+ * Protect the storeMultiple instruction from being reordered with
+ * other Dalvik stack accesses.
+ */
+ genBarrier(cUnit);
storeMultiple(cUnit, rScratch, (1<<rSrcLo) | (1 << rSrcHi));
+ genBarrier(cUnit);
}
return res;
}
@@ -2262,6 +2352,7 @@ static bool handleFmt21c_Fmt31c(CompilationUnit *cUnit, MIR *mir)
genZeroCheck(cUnit, r0, mir->offset, NULL);
/* check cast passed - branch target here */
ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ target->defMask = ENCODE_ALL;
branch1->generic.target = (LIR *)target;
branch2->generic.target = (LIR *)target;
break;
@@ -2738,6 +2829,7 @@ static bool handleFmt22c(CompilationUnit *cUnit, MIR *mir)
opReg(cUnit, OP_BLX, r4PC);
/* branch target here */
ArmLIR *target = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ target->defMask = ENCODE_ALL;
storeValue(cUnit, r0, mir->dalvikInsn.vA, r1);
branch1->generic.target = (LIR *)target;
branch2->generic.target = (LIR *)target;
@@ -3888,6 +3980,8 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
/* Remember the first LIR for this block */
if (headLIR == NULL) {
headLIR = boundaryLIR;
+ /* Set the first boundaryLIR as a scheduling barrier */
+ headLIR->defMask = ENCODE_ALL;
}
bool notHandled;
@@ -4155,3 +4249,19 @@ void dvmCompilerArchDump(void)
LOGD("dalvik.vm.jit.op = %s", buf);
}
}
+
+/* Common initialization routine for an architecture family */
+bool dvmCompilerArchInit()
+{
+ int i;
+
+ for (i = 0; i < ARM_LAST; i++) {
+ if (EncodingMap[i].opCode != i) {
+ LOGE("Encoding order for %s is wrong: expecting %d, seeing %d",
+ EncodingMap[i].name, i, EncodingMap[i].opCode);
+ dvmAbort();
+ }
+ }
+
+ return compilerArchVariantInit();
+}
diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h
index 50a94fdd7..b484cd23d 100644
--- a/vm/compiler/codegen/arm/Codegen.h
+++ b/vm/compiler/codegen/arm/Codegen.h
@@ -22,6 +22,7 @@
* variant code such as ThumbUtilty.c
*/
+static void annotateDalvikRegAccess(ArmLIR *lir, int regId, bool isLoad);
static void setupResourceMasks(ArmLIR *lir);
static ArmLIR *newLIR0(CompilationUnit *cUnit, ArmOpCode opCode);
static ArmLIR *newLIR1(CompilationUnit *cUnit, ArmOpCode opCode,
@@ -38,10 +39,10 @@ static ArmLIR *addWordData(CompilationUnit *cUnit, int value, bool inPlace);
static inline ArmLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset,
ArmLIR *branch,
ArmLIR *pcrLabel);
+static void genBarrier(CompilationUnit *cUnit);
/* Routines which must be supplied by the variant-specific code */
static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode);
-bool dvmCompilerArchInit(void);
static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir);
static bool genInlineCos(CompilationUnit *cUnit, MIR *mir);
static bool genInlineSin(CompilationUnit *cUnit, MIR *mir);
@@ -52,7 +53,6 @@ static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
int vSrc1, int vSrc2);
static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
int vSrc2);
-
-
+static bool compilerArchVariantInit();
#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_CODEGEN_H */
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index fbc0e26f1..71fc01410 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -18,49 +18,40 @@
#include "vm/compiler/CompilerInternals.h"
#include "ArmLIR.h"
+#define DEBUG_OPT(X)
+
ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc);
/* Is this a Dalvik register access? */
static inline bool isDalvikLoad(ArmLIR *lir)
{
- return ((lir->operands[1] == rFP) &&
- ((lir->opCode == THUMB_LDR_RRI5) ||
- (lir->opCode == THUMB2_LDR_RRI12) ||
- (lir->opCode == THUMB2_VLDRS) ||
- (lir->opCode == THUMB2_VLDRD)));
+ return (lir->useMask != ~0ULL) && (lir->useMask & ENCODE_DALVIK_REG);
}
static inline bool isDalvikStore(ArmLIR *lir)
{
- return ((lir->operands[1] == rFP) &&
- ((lir->opCode == THUMB_STR_RRI5) ||
- (lir->opCode == THUMB2_STR_RRI12) ||
- (lir->opCode == THUMB2_VSTRS) ||
- (lir->opCode == THUMB2_VSTRD)));
+ return (lir->defMask != ~0ULL) && (lir->defMask & ENCODE_DALVIK_REG);
}
-/* Double regs overlap float regs. Return true if collision */
-static bool regClobber(int reg1, int reg2)
+static inline bool isDalvikRegisterPartiallyClobbered(ArmLIR *lir1,
+ ArmLIR *lir2)
{
- int reg1a, reg1b;
- int reg2a, reg2b;
- if (!FPREG(reg1) || !FPREG(reg2))
- return (reg1 == reg2);
- if (DOUBLEREG(reg1)) {
- reg1a = reg1 & FP_REG_MASK;
- reg1b = reg1a + 1;
- } else {
- reg1a = reg1b = reg1 & FP_REG_MASK;
- }
- if (DOUBLEREG(reg2)) {
- reg2a = reg2 & FP_REG_MASK;
- reg2b = reg2a + 1;
- } else {
- reg2a = reg2b = reg2 & FP_REG_MASK;
- }
- return (reg1a == reg2a) || (reg1a == reg2b) ||
- (reg1b == reg2a) || (reg1b == reg2b);
+ int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
+ int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
+ int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
+ int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
+
+ return (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
+}
+
+static void dumpDependentInsnPair(ArmLIR *thisLIR, ArmLIR *checkLIR,
+ const char *optimization)
+{
+ LOGD("************ %s ************", optimization);
+ dvmDumpLIRInsn((LIR *) thisLIR, 0);
+ dvmDumpLIRInsn((LIR *) checkLIR, 0);
}
+
/*
* Perform a pass of top-down walk to
* 1) Eliminate redundant loads and stores
@@ -81,15 +72,18 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
continue;
}
if (isDalvikStore(thisLIR)) {
- int dRegId = thisLIR->operands[2];
+ int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo);
+ int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo);
int nativeRegId = thisLIR->operands[0];
ArmLIR *checkLIR;
int sinkDistance = 0;
/*
* Add r15 (pc) to the mask to prevent this instruction
- * from sinking past branch instructions.
+ * from sinking past branch instructions. Unset the Dalvik register
+ * bit when checking with native resource constraints.
*/
- u8 stopMask = ENCODE_GP_REG(rpc) | thisLIR->useMask;
+ u8 stopMask = (ENCODE_REG_PC | thisLIR->useMask) &
+ ~ENCODE_DALVIK_REG;
for (checkLIR = NEXT_LIR(thisLIR);
checkLIR != tailLIR;
@@ -97,10 +91,8 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
/* Check if a Dalvik register load is redundant */
if (isDalvikLoad(checkLIR) &&
- checkLIR->operands[2] == dRegId ) {
- if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) {
- break; // TODO: handle gen<=>float copies
- }
+ (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
+ (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
/* Insert a move to replace the load */
if (checkLIR->operands[0] != nativeRegId) {
ArmLIR *moveLIR;
@@ -117,39 +109,34 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
checkLIR->isNop = true;
continue;
- /* Found a true output dependency - nuke the previous store */
+ /*
+ * Found a true output dependency - nuke the previous store.
+ * The register type doesn't matter here.
+ */
} else if (isDalvikStore(checkLIR) &&
- checkLIR->operands[2] == dRegId) {
+ (checkLIR->aliasInfo == thisLIR->aliasInfo)) {
thisLIR->isNop = true;
break;
/* Find out the latest slot that the store can be sunk into */
} else {
- bool stopHere = false;
-
/* Last instruction reached */
- stopHere |= NEXT_LIR(checkLIR) == tailLIR;
-
- /*
- * Conservatively assume there is a memory dependency
- * for st/ld multiples and reg+reg address mode
- */
- stopHere |= checkLIR->opCode == THUMB_STMIA ||
- checkLIR->opCode == THUMB_LDMIA ||
- checkLIR->opCode == THUMB_STR_RRR ||
- checkLIR->opCode == THUMB_LDR_RRR ||
- checkLIR->opCode == THUMB2_STR_RRR ||
- checkLIR->opCode == THUMB2_LDR_RRR ||
- checkLIR->opCode == THUMB2_STMIA ||
- checkLIR->opCode == THUMB2_LDMIA ||
- checkLIR->opCode == THUMB2_VLDRD ||
- checkLIR->opCode == THUMB2_VSTRD;
-
+ bool stopHere = (NEXT_LIR(checkLIR) == tailLIR);
/* Store data is clobbered */
- stopHere |= (stopMask & checkLIR->defMask) != 0;
+ stopHere |= ((stopMask & checkLIR->defMask) != 0);
+
+ /* Store data partially clobbers the Dalvik register */
+ if (stopHere == false &&
+ ((checkLIR->useMask | checkLIR->defMask) &
+ ENCODE_DALVIK_REG)) {
+ stopHere = isDalvikRegisterPartiallyClobbered(thisLIR,
+ checkLIR);
+ }
/* Found a new place to put the store - move it here */
if (stopHere == true) {
+ DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+ "SINK STORE"));
/* The store can be sunk for at least one cycle */
if (sinkDistance != 0) {
ArmLIR *newStoreLIR =
@@ -195,25 +182,73 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
continue;
}
if (isDalvikLoad(thisLIR)) {
- int dRegId = thisLIR->operands[2];
+ int dRegId = DECODE_ALIAS_INFO_REG(thisLIR->aliasInfo);
+ int dRegIdHi = dRegId + DECODE_ALIAS_INFO_WIDE(thisLIR->aliasInfo);
int nativeRegId = thisLIR->operands[0];
ArmLIR *checkLIR;
int hoistDistance = 0;
- u8 stopUseMask = ENCODE_GP_REG(rpc) | thisLIR->useMask;
- u8 stopDefMask = thisLIR->defMask;
+ u8 stopUseMask = (ENCODE_REG_PC | thisLIR->useMask) &
+ ~ENCODE_DALVIK_REG;
+ u8 stopDefMask = thisLIR->defMask & ~ENCODE_DALVIK_REG;
+ /* First check if the load can be completely elinimated */
for (checkLIR = PREV_LIR(thisLIR);
checkLIR != headLIR;
checkLIR = PREV_LIR(checkLIR)) {
if (checkLIR->isNop) continue;
+ /*
+ * Check if the Dalvik register is previously accessed
+ * with exactly the same type.
+ */
+ if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) &&
+ (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
+ (checkLIR->operands[0] == nativeRegId)) {
+ /*
+ * If it is previously accessed but with a different type,
+ * the search will terminate later at the point checking
+ * for partially overlapping stores.
+ */
+ thisLIR->isNop = true;
+ break;
+ }
+
+ /*
+ * No earlier use/def can reach this load if:
+ * 1) Head instruction is reached
+ * 2) load target register is clobbered
+ * 3) A branch is seen (stopUseMask has the PC bit set).
+ */
+ if ((checkLIR == headLIR) ||
+ (stopUseMask | stopDefMask) & checkLIR->defMask) {
+ break;
+ }
+
+ /* Store data partially clobbers the Dalvik register */
+ if (isDalvikStore(checkLIR) &&
+ isDalvikRegisterPartiallyClobbered(thisLIR, checkLIR)) {
+ break;
+ }
+ }
+
+ /* The load has been eliminated */
+ if (thisLIR->isNop) continue;
+
+ /*
+ * The load cannot be eliminated. See if it can be hoisted to an
+ * earlier spot.
+ */
+ for (checkLIR = PREV_LIR(thisLIR);
+ /* empty by intention */;
+ checkLIR = PREV_LIR(checkLIR)) {
+
+ if (checkLIR->isNop) continue;
+
/* Check if the current load is redundant */
if ((isDalvikLoad(checkLIR) || isDalvikStore(checkLIR)) &&
- checkLIR->operands[2] == dRegId ) {
- if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) {
- break; // TODO: handle gen<=>float copies
- }
+ (checkLIR->aliasInfo == thisLIR->aliasInfo) &&
+ (REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId))) {
/* Insert a move to replace the load */
if (checkLIR->operands[0] != nativeRegId) {
ArmLIR *moveLIR;
@@ -226,41 +261,54 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
dvmCompilerInsertLIRAfter((LIR *) checkLIR,
(LIR *) moveLIR);
}
- cUnit->printMe = true;
thisLIR->isNop = true;
break;
/* Find out if the load can be yanked past the checkLIR */
} else {
- bool stopHere = false;
-
/* Last instruction reached */
- stopHere |= PREV_LIR(checkLIR) == headLIR;
-
- /*
- * Conservatively assume there is a memory dependency
- * for st/ld multiples and reg+reg address mode
- */
- stopHere |= checkLIR->opCode == THUMB_STMIA ||
- checkLIR->opCode == THUMB_LDMIA ||
- checkLIR->opCode == THUMB_STR_RRR ||
- checkLIR->opCode == THUMB_LDR_RRR ||
- checkLIR->opCode == THUMB2_STR_RRR ||
- checkLIR->opCode == THUMB2_LDR_RRR ||
- checkLIR->opCode == THUMB2_STMIA ||
- checkLIR->opCode == THUMB2_LDMIA ||
- checkLIR->opCode == THUMB2_VLDRD ||
- checkLIR->opCode == THUMB2_VSTRD;
+ bool stopHere = (checkLIR == headLIR);
/* Base address is clobbered by checkLIR */
- stopHere |= (stopUseMask & checkLIR->defMask) != 0;
+ stopHere |= ((stopUseMask & checkLIR->defMask) != 0);
/* Load target clobbers use/def in checkLIR */
- stopHere |= (stopDefMask &
- (checkLIR->useMask | checkLIR->defMask)) != 0;
+ stopHere |= ((stopDefMask &
+ (checkLIR->useMask | checkLIR->defMask)) != 0);
+
+ /* Store data partially clobbers the Dalvik register */
+ if (stopHere == false &&
+ (checkLIR->defMask & ENCODE_DALVIK_REG)) {
+ stopHere = isDalvikRegisterPartiallyClobbered(thisLIR,
+ checkLIR);
+ }
+
+ /*
+ * Stop at an earlier Dalvik load if the offset of checkLIR
+ * is not less than thisLIR
+ *
+ * Experiments show that doing
+ *
+ * ldr r1, [r5, #16]
+ * ldr r0, [r5, #20]
+ *
+ * is much faster than
+ *
+ * ldr r0, [r5, #20]
+ * ldr r1, [r5, #16]
+ */
+ if (isDalvikLoad(checkLIR)) {
+ int dRegId2 =
+ DECODE_ALIAS_INFO_REG(checkLIR->aliasInfo);
+ if (dRegId2 <= dRegId) {
+ stopHere = true;
+ }
+ }
/* Found a new place to put the load - move it here */
if (stopHere == true) {
+ DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+ "HOIST LOAD"));
/* The store can be hoisted for at least one cycle */
if (hoistDistance != 0) {
ArmLIR *newLoadLIR =
@@ -274,13 +322,13 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
dvmCompilerInsertLIRAfter((LIR *) checkLIR,
(LIR *) newLoadLIR);
thisLIR->isNop = true;
- cUnit->printMe = true;
}
break;
}
/*
- * Saw a real instruction that the store can be sunk after
+ * Saw a real instruction that hosting the load is
+ * beneficial
*/
if (!isPseudoOpCode(checkLIR->opCode)) {
hoistDistance++;
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index b9d1a239e..b40656de3 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -107,11 +107,16 @@ static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir);
static inline void resetRegisterScoreboard(CompilationUnit *cUnit)
{
RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+ int i;
dvmClearAllBits(registerScoreboard->nullCheckedRegs);
registerScoreboard->liveDalvikReg = vNone;
registerScoreboard->nativeReg = vNone;
registerScoreboard->nativeRegHi = vNone;
+ for (i = 0; i < 32; i++) {
+ registerScoreboard->fp[i] = vNone;
+ }
+ registerScoreboard->nextFP = 0;
}
/* Kill the corresponding bit in the null-checked register list */
@@ -168,17 +173,6 @@ static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc,
} else {
return (registerScoreboard->nativeReg + 1) & 3;
}
-
-}
-
-/*
- * Generate a ARM_PSEUDO_IT_BOTTOM marker to indicate the end of an IT block
- */
-static void genITBottom(CompilationUnit *cUnit)
-{
- ArmLIR *itBottom = newLIR0(cUnit, ARM_PSEUDO_IT_BOTTOM);
- /* Mark all resources as being clobbered */
- itBottom->defMask = -1;
}
/*
@@ -457,42 +451,116 @@ static ArmLIR *fpVarAccess(CompilationUnit *cUnit, int vSrcDest,
{
ArmLIR *res;
if (vSrcDest > 255) {
- res = opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone);
- newLIR3(cUnit, opCode, rSrcDest, r7, 0);
+ opRegRegImm(cUnit, OP_ADD, r7, rFP, vSrcDest * 4, rNone);
+ res = newLIR3(cUnit, opCode, rSrcDest, r7, 0);
} else {
res = newLIR3(cUnit, opCode, rSrcDest, rFP, vSrcDest);
}
return res;
}
+
+static int nextFPReg(CompilationUnit *cUnit, int dalvikReg, bool isDouble)
+{
+ RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+ int reg;
+
+ if (isDouble) {
+ reg = ((registerScoreboard->nextFP + 1) & ~1) % 32;
+ registerScoreboard->nextFP = reg + 2;
+ registerScoreboard->nextFP %= 32;
+ registerScoreboard->fp[reg] = dalvikReg;
+ return dr0 + reg;
+ }
+ else {
+ reg = registerScoreboard->nextFP++;
+ registerScoreboard->nextFP %= 32;
+ registerScoreboard->fp[reg] = dalvikReg;
+ return fr0 + reg;
+ }
+}
+
+/*
+ * Select a SFP register for the dalvikReg
+ */
+static int selectSFPReg(CompilationUnit *cUnit, int dalvikReg)
+{
+ RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+ int i;
+
+ if (dalvikReg == vNone) {
+ return nextFPReg(cUnit, dalvikReg, false);;
+ }
+
+ for (i = 0; i < 32; i++) {
+ if (registerScoreboard->fp[i] == dalvikReg) {
+ return fr0 + i;
+ }
+ }
+ return nextFPReg(cUnit, dalvikReg, false);;
+}
+
+/*
+ * Select a DFP register for the dalvikReg
+ */
+static int selectDFPReg(CompilationUnit *cUnit, int dalvikReg)
+{
+ RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+ int i;
+
+ if (dalvikReg == vNone) {
+ return nextFPReg(cUnit, dalvikReg, true);;
+ }
+
+ for (i = 0; i < 32; i += 2) {
+ if (registerScoreboard->fp[i] == dalvikReg) {
+ return dr0 + i;
+ }
+ }
+ return nextFPReg(cUnit, dalvikReg, true);
+}
+
static ArmLIR *loadFloat(CompilationUnit *cUnit, int vSrc, int rDest)
{
assert(SINGLEREG(rDest));
- return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS);
+ ArmLIR *lir = fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRS);
+ annotateDalvikRegAccess(lir, vSrc, true /* isLoad */);
+ return lir;
}
/* Store a float to a Dalvik register */
-static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest,
- int rScratch)
+static ArmLIR *storeFloat(CompilationUnit *cUnit, int rSrc, int vDest)
{
+ RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+
assert(SINGLEREG(rSrc));
- return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS);
+ registerScoreboard->fp[rSrc % 32] = vDest;
+
+ ArmLIR *lir = fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRS);
+ annotateDalvikRegAccess(lir, vDest, false /* isLoad */);
+ return lir;
}
/* Load a double from a Dalvik register */
static ArmLIR *loadDouble(CompilationUnit *cUnit, int vSrc, int rDest)
{
assert(DOUBLEREG(rDest));
- return fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD);
+ ArmLIR *lir = fpVarAccess(cUnit, vSrc, rDest, THUMB2_VLDRD);
+ annotateDalvikRegAccess(lir, vSrc, true /* isLoad */);
+ return lir;
}
/* Store a double to a Dalvik register */
-static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest,
- int rScratch)
+static ArmLIR *storeDouble(CompilationUnit *cUnit, int rSrc, int vDest)
{
+ RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard;
+
assert(DOUBLEREG(rSrc));
- return fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD);
-}
+ registerScoreboard->fp[rSrc % 32] = vDest;
+ ArmLIR *lir = fpVarAccess(cUnit, vDest, rSrc, THUMB2_VSTRD);
+ annotateDalvikRegAccess(lir, vDest, false /* isLoad */);
+ return lir;
+}
/*
* Load value from base + displacement. Optionally perform null check
@@ -507,28 +575,30 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
bool nullCheck, int vReg)
{
ArmLIR *first = NULL;
- ArmLIR *res;
+ ArmLIR *res, *load;
ArmOpCode opCode = THUMB_BKPT;
bool shortForm = false;
bool thumb2Form = (displacement < 4092 && displacement >= 0);
int shortMax = 128;
bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest));
+ int encodedDisp = displacement;
+
switch (size) {
case WORD:
if (LOWREG(rDest) && (rBase == rpc) &&
(displacement <= 1020) && (displacement >= 0)) {
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_LDR_PC_REL;
} else if (LOWREG(rDest) && (rBase == r13) &&
(displacement <= 1020) && (displacement >= 0)) {
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_LDR_SP_REL;
} else if (allLowRegs && displacement < 128 && displacement >= 0) {
assert((displacement & 0x3) == 0);
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_LDR_RRI5;
} else if (thumb2Form) {
shortForm = true;
@@ -539,7 +609,7 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
if (allLowRegs && displacement < 64 && displacement >= 0) {
assert((displacement & 0x1) == 0);
shortForm = true;
- displacement >>= 1;
+ encodedDisp >>= 1;
opCode = THUMB_LDRH_RRI5;
} else if (displacement < 4092 && displacement >= 0) {
shortForm = true;
@@ -573,11 +643,15 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
if (nullCheck)
first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL);
if (shortForm) {
- res = newLIR3(cUnit, opCode, rDest, rBase, displacement);
+ load = res = newLIR3(cUnit, opCode, rDest, rBase, encodedDisp);
} else {
assert(rBase != rDest);
- res = loadConstant(cUnit, rDest, displacement);
- loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size);
+ res = loadConstant(cUnit, rDest, encodedDisp);
+ load = loadBaseIndexed(cUnit, rBase, rDest, rDest, 0, size);
+ }
+
+ if (rBase == rFP) {
+ annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */);
}
return (first) ? first : res;
}
@@ -586,12 +660,14 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
int displacement, int rSrc, OpSize size,
int rScratch)
{
- ArmLIR *res;
+ ArmLIR *res, *store;
ArmOpCode opCode = THUMB_BKPT;
bool shortForm = false;
bool thumb2Form = (displacement < 4092 && displacement >= 0);
int shortMax = 128;
bool allLowRegs = (LOWREG(rBase) && LOWREG(rSrc));
+ int encodedDisp = displacement;
+
if (rScratch != -1)
allLowRegs &= LOWREG(rScratch);
switch (size) {
@@ -599,7 +675,7 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
if (allLowRegs && displacement < 128 && displacement >= 0) {
assert((displacement & 0x3) == 0);
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_STR_RRI5;
} else if (thumb2Form) {
shortForm = true;
@@ -611,7 +687,7 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
if (displacement < 64 && displacement >= 0) {
assert((displacement & 0x1) == 0);
shortForm = true;
- displacement >>= 1;
+ encodedDisp >>= 1;
opCode = THUMB_STRH_RRI5;
} else if (thumb2Form) {
shortForm = true;
@@ -632,11 +708,15 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
assert(0);
}
if (shortForm) {
- res = newLIR3(cUnit, opCode, rSrc, rBase, displacement);
+ store = res = newLIR3(cUnit, opCode, rSrc, rBase, encodedDisp);
} else {
assert(rScratch != -1);
- res = loadConstant(cUnit, rScratch, displacement);
- storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size);
+ res = loadConstant(cUnit, rScratch, encodedDisp);
+ store = storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size);
+ }
+
+ if (rBase == rFP) {
+ annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */);
}
return res;
}
@@ -1139,7 +1219,7 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
branch1->generic.target = (LIR *) genIT(cUnit, ARM_COND_HI, "E");
newLIR2(cUnit, THUMB2_MOV_IMM_SHIFT, r7, modifiedImmediate(-1));
newLIR2(cUnit, THUMB_MOV_IMM, r7, 1);
- genITBottom(cUnit);
+ genBarrier(cUnit);
branch2->generic.target = (LIR *) opRegReg(cUnit, OP_NEG, r7, r7);
branch1->generic.target = (LIR *) storeValue(cUnit, r7, vDest, r4PC);
@@ -1279,7 +1359,7 @@ static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
//TODO: need assertion mechanism to validate IT region size
genIT(cUnit, (isMin) ? ARM_COND_GT : ARM_COND_LT, "");
opRegReg(cUnit, OP_MOV, reg0, reg1);
- genITBottom(cUnit);
+ genBarrier(cUnit);
if (vDest >= 0)
storeValue(cUnit, reg0, vDest, reg1);
else
diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c
index 1794638fe..49e04b414 100644
--- a/vm/compiler/codegen/arm/ThumbUtil.c
+++ b/vm/compiler/codegen/arm/ThumbUtil.c
@@ -322,26 +322,28 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
bool nullCheck, int vReg)
{
ArmLIR *first = NULL;
- ArmLIR *res;
+ ArmLIR *res, *load;
ArmOpCode opCode = THUMB_BKPT;
bool shortForm = false;
int shortMax = 128;
+ int encodedDisp = displacement;
+
switch (size) {
case WORD:
if (LOWREG(rDest) && (rBase == rpc) &&
(displacement <= 1020) && (displacement >= 0)) {
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_LDR_PC_REL;
} else if (LOWREG(rDest) && (rBase == r13) &&
(displacement <= 1020) && (displacement >= 0)) {
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_LDR_SP_REL;
} else if (displacement < 128 && displacement >= 0) {
assert((displacement & 0x3) == 0);
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_LDR_RRI5;
} else {
opCode = THUMB_LDR_RRR;
@@ -351,7 +353,7 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
if (displacement < 64 && displacement >= 0) {
assert((displacement & 0x1) == 0);
shortForm = true;
- displacement >>= 1;
+ encodedDisp >>= 1;
opCode = THUMB_LDRH_RRI5;
} else {
opCode = THUMB_LDRH_RRR;
@@ -377,12 +379,17 @@ static ArmLIR *loadBaseDisp(CompilationUnit *cUnit, MIR *mir, int rBase,
if (nullCheck)
first = genNullCheck(cUnit, vReg, rBase, mir->offset, NULL);
if (shortForm) {
- res = newLIR3(cUnit, opCode, rDest, rBase, displacement);
+ load = res = newLIR3(cUnit, opCode, rDest, rBase, encodedDisp);
} else {
assert(rBase != rDest);
- res = loadConstant(cUnit, rDest, displacement);
- newLIR3(cUnit, opCode, rDest, rBase, rDest);
+ res = loadConstant(cUnit, rDest, encodedDisp);
+ load = newLIR3(cUnit, opCode, rDest, rBase, rDest);
}
+
+ if (rBase == rFP) {
+ annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */);
+ }
+
return (first) ? first : res;
}
@@ -390,16 +397,18 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
int displacement, int rSrc, OpSize size,
int rScratch)
{
- ArmLIR *res;
+ ArmLIR *res, *store;
ArmOpCode opCode = THUMB_BKPT;
bool shortForm = false;
int shortMax = 128;
+ int encodedDisp = displacement;
+
switch (size) {
case WORD:
if (displacement < 128 && displacement >= 0) {
assert((displacement & 0x3) == 0);
shortForm = true;
- displacement >>= 2;
+ encodedDisp >>= 2;
opCode = THUMB_STR_RRI5;
} else {
opCode = THUMB_STR_RRR;
@@ -410,7 +419,7 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
if (displacement < 64 && displacement >= 0) {
assert((displacement & 0x1) == 0);
shortForm = true;
- displacement >>= 1;
+ encodedDisp >>= 1;
opCode = THUMB_STRH_RRI5;
} else {
opCode = THUMB_STRH_RRR;
@@ -429,11 +438,15 @@ static ArmLIR *storeBaseDisp(CompilationUnit *cUnit, int rBase,
assert(0);
}
if (shortForm) {
- res = newLIR3(cUnit, opCode, rSrc, rBase, displacement);
+ store = res = newLIR3(cUnit, opCode, rSrc, rBase, encodedDisp);
} else {
assert(rScratch != -1);
- res = loadConstant(cUnit, rScratch, displacement);
- newLIR3(cUnit, opCode, rSrc, rBase, rScratch);
+ res = loadConstant(cUnit, rScratch, encodedDisp);
+ store = newLIR3(cUnit, opCode, rSrc, rBase, rScratch);
+ }
+
+ if (rBase == rFP) {
+ annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */);
}
return res;
}
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
index 41a79de0f..d07d96e4c 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
@@ -74,7 +74,7 @@ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
}
/* Architecture-specific initializations and checks go here */
-bool dvmCompilerArchInit(void)
+static bool compilerArchVariantInit(void)
{
/* First, declare dvmCompiler_TEMPLATE_XXX for each template */
#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
diff --git a/vm/compiler/codegen/arm/armv5te/ArchVariant.c b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
index 4bd354b45..b4a38480a 100644
--- a/vm/compiler/codegen/arm/armv5te/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te/ArchVariant.c
@@ -74,7 +74,7 @@ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
}
/* Architecture-specific initializations and checks go here */
-bool dvmCompilerArchInit(void)
+static bool compilerArchVariantInit(void)
{
/* First, declare dvmCompiler_TEMPLATE_XXX for each template */
#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 65e0ec0e8..fba1e3259 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -77,7 +77,7 @@ static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode)
}
/* Architecture-specific initializations and checks go here */
-bool dvmCompilerArchInit(void)
+static bool compilerArchVariantInit(void)
{
/* First, declare dvmCompiler_TEMPLATE_XXX for each template */
#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
@@ -132,11 +132,13 @@ static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
newLIR3(cUnit, THUMB2_FMRRD, r0, r1, dr1);
newLIR1(cUnit, THUMB_BLX_R, r2);
newLIR3(cUnit, THUMB2_FMDRR, dr0, r0, r1);
+ ArmLIR *label = newLIR0(cUnit, ARM_PSEUDO_TARGET_LABEL);
+ label->defMask = ENCODE_ALL;
+ branch->generic.target = (LIR *)label;
if (vDest >= 0)
- target = storeDouble(cUnit, dr0, vDest, rNone);
+ storeDouble(cUnit, dr0, vDest);
else
- target = newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
- branch->generic.target = (LIR *)target;
+ newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
resetRegisterScoreboard(cUnit);
return true;
}
@@ -175,10 +177,21 @@ static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest,
default:
return true;
}
- loadFloat(cUnit, vSrc1, fr2);
- loadFloat(cUnit, vSrc2, fr4);
- newLIR3(cUnit, op, fr0, fr2, fr4);
- storeFloat(cUnit, fr0, vDest, 0);
+ int reg0, reg1, reg2;
+ reg1 = selectSFPReg(cUnit, vSrc1);
+ reg2 = selectSFPReg(cUnit, vSrc2);
+ /*
+ * The register mapping is overly optimistic and lazily updated so we
+ * need to detect false sharing here.
+ */
+ if (reg1 == reg2 && vSrc1 != vSrc2) {
+ reg2 = nextFPReg(cUnit, vSrc2, false /* isDouble */);
+ }
+ loadFloat(cUnit, vSrc1, reg1);
+ loadFloat(cUnit, vSrc2, reg2);
+ reg0 = selectSFPReg(cUnit, vDest);
+ newLIR3(cUnit, op, reg0, reg1, reg2);
+ storeFloat(cUnit, reg0, vDest);
return false;
}
@@ -212,10 +225,19 @@ static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest,
default:
return true;
}
- loadDouble(cUnit, vSrc1, dr1);
- loadDouble(cUnit, vSrc2, dr2);
- newLIR3(cUnit, op, dr0, dr1, dr2);
- storeDouble(cUnit, dr0, vDest, rNone);
+
+ int reg0, reg1, reg2;
+ reg1 = selectDFPReg(cUnit, vSrc1);
+ reg2 = selectDFPReg(cUnit, vSrc2);
+ if (reg1 == reg2 && vSrc1 != vSrc2) {
+ reg2 = nextFPReg(cUnit, vSrc2, true /* isDouble */);
+ }
+ loadDouble(cUnit, vSrc1, reg1);
+ loadDouble(cUnit, vSrc2, reg2);
+ /* Rename the new vDest to a new register */
+ reg0 = selectDFPReg(cUnit, vNone);
+ newLIR3(cUnit, op, reg0, reg1, reg2);
+ storeDouble(cUnit, reg0, vDest);
return false;
}
@@ -270,18 +292,20 @@ static bool genConversion(CompilationUnit *cUnit, MIR *mir)
return true;
}
if (longSrc) {
- srcReg = dr1;
+ srcReg = selectDFPReg(cUnit, vSrc2);
loadDouble(cUnit, vSrc2, srcReg);
} else {
- srcReg = fr2;
+ srcReg = selectSFPReg(cUnit, vSrc2);
loadFloat(cUnit, vSrc2, srcReg);
}
if (longDest) {
- newLIR2(cUnit, op, dr0, srcReg);
- storeDouble(cUnit, dr0, vSrc1Dest, rNone);
+ int destReg = selectDFPReg(cUnit, vNone);
+ newLIR2(cUnit, op, destReg, srcReg);
+ storeDouble(cUnit, destReg, vSrc1Dest);
} else {
- newLIR2(cUnit, op, fr0, srcReg);
- storeFloat(cUnit, fr0, vSrc1Dest, 0);
+ int destReg = selectSFPReg(cUnit, vNone);
+ newLIR2(cUnit, op, destReg, srcReg);
+ storeFloat(cUnit, destReg, vSrc1Dest);
}
return false;
}
@@ -292,6 +316,7 @@ static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
bool isDouble;
int defaultResult;
bool ltNaNBias;
+ int fpReg1, fpReg2;
switch(mir->dalvikInsn.opCode) {
case OP_CMPL_FLOAT:
@@ -314,17 +339,27 @@ static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1,
return true;
}
if (isDouble) {
- loadDouble(cUnit, vSrc1, dr0);
- loadDouble(cUnit, vSrc2, dr1);
+ fpReg1 = selectDFPReg(cUnit, vSrc1);
+ fpReg2 = selectDFPReg(cUnit, vSrc2);
+ if (fpReg1 == fpReg2 && vSrc1 != vSrc2) {
+ fpReg2 = nextFPReg(cUnit, vSrc2, true /* isDouble */);
+ }
+ loadDouble(cUnit, vSrc1, fpReg1);
+ loadDouble(cUnit, vSrc2, fpReg2);
// Hard-coded use of r7 as temp. Revisit
- loadConstant(cUnit,r7, defaultResult);
- newLIR2(cUnit, THUMB2_VCMPD, dr0, dr1);
+ loadConstant(cUnit, r7, defaultResult);
+ newLIR2(cUnit, THUMB2_VCMPD, fpReg1, fpReg2);
} else {
- loadFloat(cUnit, vSrc1, fr0);
- loadFloat(cUnit, vSrc2, fr2);
+ fpReg1 = selectSFPReg(cUnit, vSrc1);
+ fpReg2 = selectSFPReg(cUnit, vSrc2);
+ if (fpReg1 == fpReg2 && vSrc1 != vSrc2) {
+ fpReg2 = nextFPReg(cUnit, vSrc2, false /* isDouble */);
+ }
+ loadFloat(cUnit, vSrc1, fpReg1);
+ loadFloat(cUnit, vSrc2, fpReg2);
// Hard-coded use of r7 as temp. Revisit
- loadConstant(cUnit,r7, defaultResult);
- newLIR2(cUnit, THUMB2_VCMPS, fr0, fr2);
+ loadConstant(cUnit, r7, defaultResult);
+ newLIR2(cUnit, THUMB2_VCMPS, fpReg1, fpReg2);
}
newLIR0(cUnit, THUMB2_FMSTAT);
genIT(cUnit, (defaultResult == -1) ? ARM_COND_GT : ARM_COND_MI, "");