summaryrefslogtreecommitdiffstats
path: root/vm/compiler/codegen/arm/Thumb2
diff options
context:
space:
mode:
authorBill Buzbee <buzbee@google.com>2010-03-02 16:14:41 -0800
committerBill Buzbee <buzbee@google.com>2010-03-03 15:13:17 -0800
commit1f74863d3e0f19930818398f375ebf1cf2d78969 (patch)
tree4e646a4e73cae7d5e98c621d1cc1aa330a466cf9 /vm/compiler/codegen/arm/Thumb2
parent10ebc7d0b84dcb98e1a7eeac96ef06acdfc8d184 (diff)
downloadandroid_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.tar.gz
android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.tar.bz2
android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.zip
Jit: Sapphire tuning - mostly scheduling.
Re-enabled load/store motion that had inadvertently been turned off for non-armv7 targets. Tagged memory references with the kind of memory they touch (Dalvik frame, literal pool, heap) to enable more aggressive load hoisting. Eliminated some largely duplicate code in the target specific files. Reworked temp register allocation code to allocate next temp round-robin (to improve scheduling opportunities). Overall, nice gain for Sapphire. Shows 5% to 15% on some benchmarks, and measurable improvements for Passion.
Diffstat (limited to 'vm/compiler/codegen/arm/Thumb2')
-rw-r--r--vm/compiler/codegen/arm/Thumb2/Factory.c15
-rw-r--r--vm/compiler/codegen/arm/Thumb2/Gen.c81
2 files changed, 15 insertions, 81 deletions
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c
index 242e665ab..c4d2c2824 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.c
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.c
@@ -68,6 +68,10 @@ static ArmLIR *loadFPConstantValue(CompilationUnit *cUnit, int rDest,
loadPcRel->operands[0] = rDest;
loadPcRel->operands[1] = rpc;
setupResourceMasks(loadPcRel);
+ // Self-cosim workaround.
+ if (rDest != rlr)
+ setMemRefType(loadPcRel, true, kLiteral);
+ loadPcRel->aliasInfo = dataTarget->operands[0];
dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
return loadPcRel;
}
@@ -168,6 +172,17 @@ static ArmLIR *loadConstantValue(CompilationUnit *cUnit, int rDest, int value)
loadPcRel->generic.target = (LIR *) dataTarget;
loadPcRel->operands[0] = rDest;
setupResourceMasks(loadPcRel);
+ /*
+ * Special case for literal loads with a link register target.
+ * Self-cosim mode will insert calls prior to heap references
+ * after optimization, and those will destroy r14. The easy
+ * workaround is to treat literal loads into r14 as heap references
+ * to prevent them from being hoisted. Use of r14 in this manner
+ * is currently rare. Revisit if that changes.
+ */
+ if (rDest != rlr)
+ setMemRefType(loadPcRel, true, kLiteral);
+ loadPcRel->aliasInfo = dataTarget->operands[0];
res = loadPcRel;
dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c
index 8b2b69653..1a505efe2 100644
--- a/vm/compiler/codegen/arm/Thumb2/Gen.c
+++ b/vm/compiler/codegen/arm/Thumb2/Gen.c
@@ -322,65 +322,6 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
branch3->generic.target = branch1->generic.target;
}
-static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
-{
- RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0);
- RegLocation rlDest = inlinedTarget(cUnit, mir, false);
- rlObj = loadValue(cUnit, rlObj, kCoreReg);
- RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
- genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, NULL);
- loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count,
- rlResult.lowReg);
- storeValue(cUnit, rlDest, rlResult);
- return false;
-}
-
-static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir)
-{
- int contents = offsetof(ArrayObject, contents);
- RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0);
- RegLocation rlIdx = dvmCompilerGetSrc(cUnit, mir, 1);
- RegLocation rlDest = inlinedTarget(cUnit, mir, false);
- RegLocation rlResult;
- rlObj = loadValue(cUnit, rlObj, kCoreReg);
- rlIdx = loadValue(cUnit, rlIdx, kCoreReg);
- int regMax = dvmCompilerAllocTemp(cUnit);
- int regOff = dvmCompilerAllocTemp(cUnit);
- int regPtr = dvmCompilerAllocTemp(cUnit);
- ArmLIR *pcrLabel = genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg,
- mir->offset, NULL);
- loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, regMax);
- loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_offset, regOff);
- loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_value, regPtr);
- genBoundsCheck(cUnit, rlIdx.lowReg, regMax, mir->offset, pcrLabel);
- dvmCompilerFreeTemp(cUnit, regMax);
- opRegImm(cUnit, kOpAdd, regPtr, contents);
- opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg);
- rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
- loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf);
- storeValue(cUnit, rlDest, rlResult);
- return false;
-}
-
-static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir)
-{
- RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
- rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
- RegLocation rlDest = inlinedTarget(cUnit, mir, false);;
- RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
- int signReg = dvmCompilerAllocTemp(cUnit);
- /*
- * abs(x) = y<=x>>31, (x+y)^y.
- * Thumb2's IT block also yields 3 instructions, but imposes
- * scheduling constraints.
- */
- opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31);
- opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
- opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
- storeValue(cUnit, rlDest, rlResult);
- return false;
-}
-
static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
{
RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
@@ -421,28 +362,6 @@ static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
return false;
}
-static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
-{
- RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1);
- RegLocation rlDest = inlinedTargetWide(cUnit, mir, false);
- rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
- RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
- int signReg = dvmCompilerAllocTemp(cUnit);
- /*
- * abs(x) = y<=x>>31, (x+y)^y.
- * Thumb2 IT block allows slightly shorter sequence,
- * but introduces a scheduling barrier. Stick with this
- * mechanism for now.
- */
- opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31);
- opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
- opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg);
- opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
- opRegReg(cUnit, kOpXor, rlResult.highReg, signReg);
- storeValueWide(cUnit, rlDest, rlResult);
- return false;
-}
-
static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
RegLocation rlSrc, RegLocation rlResult, int lit,
int firstBit, int secondBit)