summaryrefslogtreecommitdiffstats
path: root/vm/compiler/codegen/arm
diff options
context:
space:
mode:
authorbuzbee <buzbee@google.com>2012-11-30 06:46:45 -0800
committerElliott Hughes <enh@google.com>2012-11-30 14:48:08 -0800
commitc83353bc0d7ddfedd3bc06fdb12f25710191e1e6 (patch)
tree13e0b60b68090a0355bdd4fd486a8462d8bb7f95 /vm/compiler/codegen/arm
parentc375ac332911cfea8e6570e2681cf79ad5220edb (diff)
downloadandroid_dalvik-c83353bc0d7ddfedd3bc06fdb12f25710191e1e6.tar.gz
android_dalvik-c83353bc0d7ddfedd3bc06fdb12f25710191e1e6.tar.bz2
android_dalvik-c83353bc0d7ddfedd3bc06fdb12f25710191e1e6.zip
JIT: Performance fix for const doubles
Some recent Arm processors take a performance hit when creating a floating point double by loading it as a pair of singles. Legacy code to support soft floating point doubles as a pair of core registers loaded double immediates in this way. With the CL, we handle double immediates as a single unit. (cherry-pick of c8129911e598ad0ca8d7b31012444ab6ce8bce45.) Change-Id: Ic1512e34bfd233a6f5ffd58ce843965adbbad875
Diffstat (limited to 'vm/compiler/codegen/arm')
-rw-r--r--vm/compiler/codegen/arm/Assemble.cpp3
-rw-r--r--vm/compiler/codegen/arm/CodegenCommon.cpp27
-rw-r--r--vm/compiler/codegen/arm/Thumb2/Factory.cpp40
3 files changed, 65 insertions, 5 deletions
diff --git a/vm/compiler/codegen/arm/Assemble.cpp b/vm/compiler/codegen/arm/Assemble.cpp
index d1ecd9756..7406d3e88 100644
--- a/vm/compiler/codegen/arm/Assemble.cpp
+++ b/vm/compiler/codegen/arm/Assemble.cpp
@@ -952,6 +952,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
if (lir->opcode == kThumbLdrPcRel ||
lir->opcode == kThumb2LdrPcRel12 ||
lir->opcode == kThumbAddPcRel ||
+ ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
intptr_t pc = (lir->generic.offset + 4) & ~3;
@@ -976,7 +977,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
}
return kRetryHalve;
}
- if (lir->opcode == kThumb2Vldrs) {
+ if ((lir->opcode == kThumb2Vldrs) || (lir->opcode == kThumb2Vldrd)) {
lir->operands[2] = delta >> 2;
} else {
lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
diff --git a/vm/compiler/codegen/arm/CodegenCommon.cpp b/vm/compiler/codegen/arm/CodegenCommon.cpp
index 07f3ac765..5c02678fe 100644
--- a/vm/compiler/codegen/arm/CodegenCommon.cpp
+++ b/vm/compiler/codegen/arm/CodegenCommon.cpp
@@ -368,6 +368,25 @@ static ArmLIR *scanLiteralPool(LIR *dataTarget, int value, unsigned int delta)
return NULL;
}
+/* Search the existing constants in the literal pool for an exact wide match */
+ArmLIR* scanLiteralPoolWide(LIR* dataTarget, int valLo, int valHi)
+{
+ bool lowMatch = false;
+ ArmLIR* lowTarget = NULL;
+ while (dataTarget) {
+ if (lowMatch && (((ArmLIR *)dataTarget)->operands[0] == valHi)) {
+ return lowTarget;
+ }
+ lowMatch = false;
+ if (((ArmLIR *) dataTarget)->operands[0] == valLo) {
+ lowMatch = true;
+ lowTarget = (ArmLIR *) dataTarget;
+ }
+ dataTarget = dataTarget->next;
+ }
+ return NULL;
+}
+
/*
* The following are building blocks to insert constants into the pool or
* instruction streams.
@@ -392,6 +411,14 @@ static ArmLIR *addWordData(CompilationUnit *cUnit, LIR **constantListP,
return NULL;
}
+/* Add a 64-bit constant to the literal pool or mixed with code */
+ArmLIR* addWideData(CompilationUnit* cUnit, LIR** constantListP,
+ int valLo, int valHi)
+{
+ addWordData(cUnit, constantListP, valHi);
+ return addWordData(cUnit, constantListP, valLo);
+}
+
static RegLocation inlinedTargetWide(CompilationUnit *cUnit, MIR *mir,
bool fpHint)
{
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.cpp b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
index 9c9ce1327..c3c37128c 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.cpp
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
@@ -53,7 +53,14 @@ static ArmLIR *loadFPConstantValue(CompilationUnit *cUnit, int rDest,
{
int encodedImm = encodeImmSingle(value);
assert(SINGLEREG(rDest));
- if (encodedImm >= 0) {
+ if (value == 0) {
+ // TODO: we need better info about the target CPU. a vector exclusive or
+ // would probably be better here if we could rely on its existance.
+ // Load an immediate +2.0 (which encodes to 0)
+ newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, 0);
+ // +0.0 = +2.0 - +2.0
+ return newLIR3(cUnit, kThumb2Vsubs, rDest, rDest, rDest);
+ } else if (encodedImm >= 0) {
return newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, encodedImm);
}
ArmLIR *dataTarget = scanLiteralPool(cUnit->literalList, value, 0);
@@ -696,9 +703,34 @@ static ArmLIR *loadConstantValueWide(CompilationUnit *cUnit, int rDestLo,
{
int encodedImm = encodeImmDouble(valLo, valHi);
ArmLIR *res;
- if (FPREG(rDestLo) && (encodedImm >= 0)) {
- res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi),
- encodedImm);
+ int targetReg = S2D(rDestLo, rDestHi);
+ if (FPREG(rDestLo)) {
+ if ((valLo == 0) && (valHi == 0)) {
+ // TODO: we need better info about the target CPU. a vector
+ // exclusive or would probably be better here if we could rely on
+ // its existance.
+ // Load an immediate +2.0 (which encodes to 0)
+ newLIR2(cUnit, kThumb2Vmovd_IMM8, targetReg, 0);
+ // +0.0 = +2.0 - +2.0
+ res = newLIR3(cUnit, kThumb2Vsubd, targetReg, targetReg, targetReg);
+ } else if (encodedImm >= 0) {
+ res = newLIR2(cUnit, kThumb2Vmovd_IMM8, targetReg, encodedImm);
+ } else {
+ ArmLIR* dataTarget = scanLiteralPoolWide(cUnit->literalList, valLo, valHi);
+ if (dataTarget == NULL) {
+ dataTarget = addWideData(cUnit, &cUnit->literalList, valLo, valHi);
+ }
+ ArmLIR *loadPcRel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
+ loadPcRel->opcode = kThumb2Vldrd;
+ loadPcRel->generic.target = (LIR *) dataTarget;
+ loadPcRel->operands[0] = targetReg;
+ loadPcRel->operands[1] = r15pc;
+ setupResourceMasks(loadPcRel);
+ setMemRefType(loadPcRel, true, kLiteral);
+ loadPcRel->aliasInfo = dataTarget->operands[0];
+ dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel);
+ res = loadPcRel;
+ }
} else {
res = loadConstantNoClobber(cUnit, rDestLo, valLo);
loadConstantNoClobber(cUnit, rDestHi, valHi);