diff options
author | Bill Buzbee <buzbee@google.com> | 2010-03-02 16:14:41 -0800 |
---|---|---|
committer | Bill Buzbee <buzbee@google.com> | 2010-03-03 15:13:17 -0800 |
commit | 1f74863d3e0f19930818398f375ebf1cf2d78969 (patch) | |
tree | 4e646a4e73cae7d5e98c621d1cc1aa330a466cf9 /vm/compiler/codegen/arm/Thumb2 | |
parent | 10ebc7d0b84dcb98e1a7eeac96ef06acdfc8d184 (diff) | |
download | android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.tar.gz android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.tar.bz2 android_dalvik-1f74863d3e0f19930818398f375ebf1cf2d78969.zip |
Jit: Sapphire tuning - mostly scheduling.
Re-enabled load/store motion that had inadvertently been turned off for
non-armv7 targets. Tagged memory references with the kind of memory
they touch (Dalvik frame, literal pool, heap) to enable more aggressive
load hoisting. Eliminated some largely duplicate code in the target
specific files. Reworked temp register allocation code to allocate next
temp round-robin (to improve scheduling opportunities).
Overall, nice gain for Sapphire. Shows 5% to 15% on some benchmarks, and
measurable improvements for Passion.
Diffstat (limited to 'vm/compiler/codegen/arm/Thumb2')
-rw-r--r-- | vm/compiler/codegen/arm/Thumb2/Factory.c | 15 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb2/Gen.c | 81 |
2 files changed, 15 insertions, 81 deletions
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c index 242e665ab..c4d2c2824 100644 --- a/vm/compiler/codegen/arm/Thumb2/Factory.c +++ b/vm/compiler/codegen/arm/Thumb2/Factory.c @@ -68,6 +68,10 @@ static ArmLIR *loadFPConstantValue(CompilationUnit *cUnit, int rDest, loadPcRel->operands[0] = rDest; loadPcRel->operands[1] = rpc; setupResourceMasks(loadPcRel); + // Self-cosim workaround. + if (rDest != rlr) + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); return loadPcRel; } @@ -168,6 +172,17 @@ static ArmLIR *loadConstantValue(CompilationUnit *cUnit, int rDest, int value) loadPcRel->generic.target = (LIR *) dataTarget; loadPcRel->operands[0] = rDest; setupResourceMasks(loadPcRel); + /* + * Special case for literal loads with a link register target. + * Self-cosim mode will insert calls prior to heap references + * after optimization, and those will destroy r14. The easy + * workaround is to treat literal loads into r14 as heap references + * to prevent them from being hoisted. Use of r14 in this manner + * is currently rare. Revisit if that changes. + */ + if (rDest != rlr) + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; res = loadPcRel; dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c index 8b2b69653..1a505efe2 100644 --- a/vm/compiler/codegen/arm/Thumb2/Gen.c +++ b/vm/compiler/codegen/arm/Thumb2/Gen.c @@ -322,65 +322,6 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir, branch3->generic.target = branch1->generic.target; } -static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir) -{ - RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); - RegLocation rlDest = inlinedTarget(cUnit, mir, false); - rlObj = loadValue(cUnit, rlObj, kCoreReg); - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, NULL); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, - rlResult.lowReg); - storeValue(cUnit, rlDest, rlResult); - return false; -} - -static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir) -{ - int contents = offsetof(ArrayObject, contents); - RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); - RegLocation rlIdx = dvmCompilerGetSrc(cUnit, mir, 1); - RegLocation rlDest = inlinedTarget(cUnit, mir, false); - RegLocation rlResult; - rlObj = loadValue(cUnit, rlObj, kCoreReg); - rlIdx = loadValue(cUnit, rlIdx, kCoreReg); - int regMax = dvmCompilerAllocTemp(cUnit); - int regOff = dvmCompilerAllocTemp(cUnit); - int regPtr = dvmCompilerAllocTemp(cUnit); - ArmLIR *pcrLabel = genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, - mir->offset, NULL); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, regMax); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_offset, regOff); - loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_value, regPtr); - genBoundsCheck(cUnit, rlIdx.lowReg, regMax, mir->offset, pcrLabel); - dvmCompilerFreeTemp(cUnit, regMax); - opRegImm(cUnit, kOpAdd, regPtr, contents); - opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg); - rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf); - storeValue(cUnit, rlDest, rlResult); - return false; -} - -static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir) -{ - RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); - rlSrc = loadValue(cUnit, rlSrc, kCoreReg); - RegLocation rlDest = inlinedTarget(cUnit, mir, false);; - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - int signReg = dvmCompilerAllocTemp(cUnit); - /* - * abs(x) = y<=x>>31, (x+y)^y. - * Thumb2's IT block also yields 3 instructions, but imposes - * scheduling constraints. - */ - opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31); - opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); - opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); - storeValue(cUnit, rlDest, rlResult); - return false; -} - static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) { RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); @@ -421,28 +362,6 @@ static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin) return false; } -static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) -{ - RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); - RegLocation rlDest = inlinedTargetWide(cUnit, mir, false); - rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - int signReg = dvmCompilerAllocTemp(cUnit); - /* - * abs(x) = y<=x>>31, (x+y)^y. - * Thumb2 IT block allows slightly shorter sequence, - * but introduces a scheduling barrier. Stick with this - * mechanism for now. - */ - opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31); - opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); - opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg); - opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); - opRegReg(cUnit, kOpXor, rlResult.highReg, signReg); - storeValueWide(cUnit, rlDest, rlResult); - return false; -} - static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, RegLocation rlSrc, RegLocation rlResult, int lit, int firstBit, int secondBit) |