diff options
author | buzbee <buzbee@google.com> | 2012-11-30 06:46:45 -0800 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2012-11-30 14:48:08 -0800 |
commit | c83353bc0d7ddfedd3bc06fdb12f25710191e1e6 (patch) | |
tree | 13e0b60b68090a0355bdd4fd486a8462d8bb7f95 /vm/compiler/codegen/arm | |
parent | c375ac332911cfea8e6570e2681cf79ad5220edb (diff) | |
download | android_dalvik-c83353bc0d7ddfedd3bc06fdb12f25710191e1e6.tar.gz android_dalvik-c83353bc0d7ddfedd3bc06fdb12f25710191e1e6.tar.bz2 android_dalvik-c83353bc0d7ddfedd3bc06fdb12f25710191e1e6.zip |
JIT: Performance fix for const doubles
Some recent Arm processors take a performance hit when
creating a floating point double by loading it as a pair of singles.
Legacy code to support soft floating point doubles as a pair of core
registers loaded double immediates in this way.
With the CL, we handle double immediates as a single unit.
(cherry-pick of c8129911e598ad0ca8d7b31012444ab6ce8bce45.)
Change-Id: Ic1512e34bfd233a6f5ffd58ce843965adbbad875
Diffstat (limited to 'vm/compiler/codegen/arm')
-rw-r--r-- | vm/compiler/codegen/arm/Assemble.cpp | 3 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/CodegenCommon.cpp | 27 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Thumb2/Factory.cpp | 40 |
3 files changed, 65 insertions, 5 deletions
diff --git a/vm/compiler/codegen/arm/Assemble.cpp b/vm/compiler/codegen/arm/Assemble.cpp index d1ecd9756..7406d3e88 100644 --- a/vm/compiler/codegen/arm/Assemble.cpp +++ b/vm/compiler/codegen/arm/Assemble.cpp @@ -952,6 +952,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, if (lir->opcode == kThumbLdrPcRel || lir->opcode == kThumb2LdrPcRel12 || lir->opcode == kThumbAddPcRel || + ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { ArmLIR *lirTarget = (ArmLIR *) lir->generic.target; intptr_t pc = (lir->generic.offset + 4) & ~3; @@ -976,7 +977,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, } return kRetryHalve; } - if (lir->opcode == kThumb2Vldrs) { + if ((lir->opcode == kThumb2Vldrs) || (lir->opcode == kThumb2Vldrd)) { lir->operands[2] = delta >> 2; } else { lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? diff --git a/vm/compiler/codegen/arm/CodegenCommon.cpp b/vm/compiler/codegen/arm/CodegenCommon.cpp index 07f3ac765..5c02678fe 100644 --- a/vm/compiler/codegen/arm/CodegenCommon.cpp +++ b/vm/compiler/codegen/arm/CodegenCommon.cpp @@ -368,6 +368,25 @@ static ArmLIR *scanLiteralPool(LIR *dataTarget, int value, unsigned int delta) return NULL; } +/* Search the existing constants in the literal pool for an exact wide match */ +ArmLIR* scanLiteralPoolWide(LIR* dataTarget, int valLo, int valHi) +{ + bool lowMatch = false; + ArmLIR* lowTarget = NULL; + while (dataTarget) { + if (lowMatch && (((ArmLIR *)dataTarget)->operands[0] == valHi)) { + return lowTarget; + } + lowMatch = false; + if (((ArmLIR *) dataTarget)->operands[0] == valLo) { + lowMatch = true; + lowTarget = (ArmLIR *) dataTarget; + } + dataTarget = dataTarget->next; + } + return NULL; +} + /* * The following are building blocks to insert constants into the pool or * instruction streams. @@ -392,6 +411,14 @@ static ArmLIR *addWordData(CompilationUnit *cUnit, LIR **constantListP, return NULL; } +/* Add a 64-bit constant to the literal pool or mixed with code */ +ArmLIR* addWideData(CompilationUnit* cUnit, LIR** constantListP, + int valLo, int valHi) +{ + addWordData(cUnit, constantListP, valHi); + return addWordData(cUnit, constantListP, valLo); +} + static RegLocation inlinedTargetWide(CompilationUnit *cUnit, MIR *mir, bool fpHint) { diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.cpp b/vm/compiler/codegen/arm/Thumb2/Factory.cpp index 9c9ce1327..c3c37128c 100644 --- a/vm/compiler/codegen/arm/Thumb2/Factory.cpp +++ b/vm/compiler/codegen/arm/Thumb2/Factory.cpp @@ -53,7 +53,14 @@ static ArmLIR *loadFPConstantValue(CompilationUnit *cUnit, int rDest, { int encodedImm = encodeImmSingle(value); assert(SINGLEREG(rDest)); - if (encodedImm >= 0) { + if (value == 0) { + // TODO: we need better info about the target CPU. a vector exclusive or + // would probably be better here if we could rely on its existance. + // Load an immediate +2.0 (which encodes to 0) + newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, 0); + // +0.0 = +2.0 - +2.0 + return newLIR3(cUnit, kThumb2Vsubs, rDest, rDest, rDest); + } else if (encodedImm >= 0) { return newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, encodedImm); } ArmLIR *dataTarget = scanLiteralPool(cUnit->literalList, value, 0); @@ -696,9 +703,34 @@ static ArmLIR *loadConstantValueWide(CompilationUnit *cUnit, int rDestLo, { int encodedImm = encodeImmDouble(valLo, valHi); ArmLIR *res; - if (FPREG(rDestLo) && (encodedImm >= 0)) { - res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi), - encodedImm); + int targetReg = S2D(rDestLo, rDestHi); + if (FPREG(rDestLo)) { + if ((valLo == 0) && (valHi == 0)) { + // TODO: we need better info about the target CPU. a vector + // exclusive or would probably be better here if we could rely on + // its existance. + // Load an immediate +2.0 (which encodes to 0) + newLIR2(cUnit, kThumb2Vmovd_IMM8, targetReg, 0); + // +0.0 = +2.0 - +2.0 + res = newLIR3(cUnit, kThumb2Vsubd, targetReg, targetReg, targetReg); + } else if (encodedImm >= 0) { + res = newLIR2(cUnit, kThumb2Vmovd_IMM8, targetReg, encodedImm); + } else { + ArmLIR* dataTarget = scanLiteralPoolWide(cUnit->literalList, valLo, valHi); + if (dataTarget == NULL) { + dataTarget = addWideData(cUnit, &cUnit->literalList, valLo, valHi); + } + ArmLIR *loadPcRel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); + loadPcRel->opcode = kThumb2Vldrd; + loadPcRel->generic.target = (LIR *) dataTarget; + loadPcRel->operands[0] = targetReg; + loadPcRel->operands[1] = r15pc; + setupResourceMasks(loadPcRel); + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; + dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); + res = loadPcRel; + } } else { res = loadConstantNoClobber(cUnit, rDestLo, valLo); loadConstantNoClobber(cUnit, rDestHi, valHi); |