summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Kondik <shade@chemlab.org>2013-06-01 20:54:46 -0700
committerSteve Kondik <shade@chemlab.org>2013-06-01 21:40:39 -0700
commitef13d33fd29c68b4e249c58c19e08fd0bcc60acb (patch)
tree9ae5734a2e9a3017549b6b2e70923bd24ff2e538
parent35267d8ed32dfd6572ee1e31cde0ca7c98ed31c7 (diff)
downloadandroid_dalvik-ef13d33fd29c68b4e249c58c19e08fd0bcc60acb.tar.gz
android_dalvik-ef13d33fd29c68b4e249c58c19e08fd0bcc60acb.tar.bz2
android_dalvik-ef13d33fd29c68b4e249c58c19e08fd0bcc60acb.zip
Revert all STE Dalvik performance patches
* At least one of these patches is problematic, and causing math errors for many users. * Let's revert for now, and continue to assist with debugging. Revert "JIT: Use vmla.f64 instead of vmuld + vaddd." This reverts commit 35267d8ed32dfd6572ee1e31cde0ca7c98ed31c7. Revert "JIT: Combine add with shift and offset for array load & store." This reverts commit 5c9e6726c29366b85dc4329809b2c48fe21f8ace. Revert "JIT: Combine shift & sub for monitor unlock." This reverts commit 38c38d95a58c590d753b56d059d71517af561fd1. Revert "JIT: Use rsb and shift in easy multiply." This reverts commit 76ed9c95be6e75ede85fc170cbd260ed97702f99. Revert "JIT: Combine add with shift." This reverts commit 102ab06911d86edcfb81c0343c20332313e4308b. Revert "JIT: Allow use of cbz/cbnz for conditional branch gen." This reverts commit d46b37eac79319da2c8bbdad1b6915aaba3e9cf5. Revert "JIT: Suppress loop suspend checks." This reverts commit 0723288f790e18669621b4f75803bbc4ff8e6bd5. Revert "JIT: Hoist loop load operations above loop." This reverts commit 8ef34ac433180181a9da57ac666a5ae171f0c682. Change-Id: Ifae34549ada90d6238a5839538eda8cb4a51ec33
-rw-r--r--vm/compiler/Loop.cpp4
-rw-r--r--vm/compiler/Loop.h8
-rw-r--r--vm/compiler/codegen/Optimizer.h2
-rw-r--r--vm/compiler/codegen/arm/ArmLIR.h4
-rw-r--r--vm/compiler/codegen/arm/Assemble.cpp9
-rw-r--r--vm/compiler/codegen/arm/CodegenDriver.cpp169
-rw-r--r--vm/compiler/codegen/arm/GlobalOptimizations.cpp208
-rw-r--r--vm/compiler/codegen/arm/LocalOptimizations.cpp176
-rw-r--r--vm/compiler/codegen/arm/Thumb/Gen.cpp149
-rw-r--r--vm/compiler/codegen/arm/Thumb2/Factory.cpp10
-rw-r--r--vm/compiler/codegen/arm/Thumb2/Gen.cpp153
11 files changed, 160 insertions, 732 deletions
diff --git a/vm/compiler/Loop.cpp b/vm/compiler/Loop.cpp
index e4b70d19e..f82668628 100644
--- a/vm/compiler/Loop.cpp
+++ b/vm/compiler/Loop.cpp
@@ -699,10 +699,6 @@ bool dvmCompilerLoopOpt(CompilationUnit *cUnit)
false /* isIterative */);
DEBUG_LOOP(dumpIVList(cUnit);)
- /* Check if we can suppress suspend checks */
- if (cUnit->numInsts <= LOOP_SUPPRESS_SUSPEND_THRESHOLD)
- loopAnalysis->suppressSuspend = true;
-
/* Only optimize array accesses for simple counted loop for now */
if (!isSimpleCountedLoop(cUnit))
return false;
diff --git a/vm/compiler/Loop.h b/vm/compiler/Loop.h
index d451476b8..803209328 100644
--- a/vm/compiler/Loop.h
+++ b/vm/compiler/Loop.h
@@ -20,12 +20,6 @@
#include "Dalvik.h"
#include "CompilerInternals.h"
-/*
- * The minimum number of instructions required in a loop,
- * to trigger generation of suspend check code.
- */
-#define LOOP_SUPPRESS_SUSPEND_THRESHOLD 10
-
typedef struct LoopAnalysis {
BitVector *isIndVarV; // length == numSSAReg
GrowableList *ivList; // induction variables
@@ -38,8 +32,6 @@ typedef struct LoopAnalysis {
LIR *branchToBody; // branch over to the body from entry
LIR *branchToPCR; // branch over to the PCR cell
bool bodyIsClean; // loop body cannot throw any exceptions
- bool branchesAdded; // Body and PCR branch added to LIR output
- bool suppressSuspend; // loop body should suppress suspend check
} LoopAnalysis;
bool dvmCompilerFilterLoopBlocks(CompilationUnit *cUnit);
diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h
index af665aaab..43d98ed38 100644
--- a/vm/compiler/codegen/Optimizer.h
+++ b/vm/compiler/codegen/Optimizer.h
@@ -30,8 +30,6 @@ enum optControlVector {
kSuppressLoads,
kMethodInlining,
kMethodJit,
- kShiftArithmetic,
- kMultiplyArithmetic,
};
/* Forward declarations */
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index cacaa7eac..cbd4c70d9 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -627,10 +627,6 @@ typedef enum ArmOpcode {
kThumb2Dmb, /* dmb [1111001110111111100011110101] option[3-0] */
kThumb2LdrPcReln12, /* ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12]
imm12[11-0] */
- kThumb2RsbRRR, /* rsb [111010111101] rn[19..16] [0000] rd[11..8]
- [0000] rm[3..0] */
- kThumb2Vmlad, /* vmla.f64 vd, vn, vm [111011100000] rn[19..16]
- rd[15-12] [10110000] rm[3..0] */
kThumbUndefined, /* undefined [11011110xxxxxxxx] */
kArmLast,
} ArmOpcode;
diff --git a/vm/compiler/codegen/arm/Assemble.cpp b/vm/compiler/codegen/arm/Assemble.cpp
index 1cc285c59..7406d3e88 100644
--- a/vm/compiler/codegen/arm/Assemble.cpp
+++ b/vm/compiler/codegen/arm/Assemble.cpp
@@ -881,15 +881,6 @@ ArmEncodingMap EncodingMap[kArmLast] = {
kFmtUnused, -1, -1,
IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
"ldr", "r!0d, [r15pc, -#!1d]", 2),
- ENCODING_MAP(kThumb2RsbRRR, 0xebd00000, /* setflags encoding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1,
- IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
- "rsb", "r!0d, r!1d, r!2d!3H", 2),
- ENCODING_MAP(kThumb2Vmlad, 0xee000b00,
- kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE0 | REG_USE1 | REG_USE2,
- "vmla.f64", "!0S, !1S, !2S", 2),
ENCODING_MAP(kThumbUndefined, 0xde00,
kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, NO_OPERAND,
diff --git a/vm/compiler/codegen/arm/CodegenDriver.cpp b/vm/compiler/codegen/arm/CodegenDriver.cpp
index d512c573a..480d6f96d 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.cpp
+++ b/vm/compiler/codegen/arm/CodegenDriver.cpp
@@ -398,6 +398,149 @@ static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
}
}
+
+/*
+ * Generate array load
+ */
+static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
+ RegLocation rlArray, RegLocation rlIndex,
+ RegLocation rlDest, int scale)
+{
+ RegisterClass regClass = dvmCompilerRegClassBySize(size);
+ int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+ int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+ RegLocation rlResult;
+ rlArray = loadValue(cUnit, rlArray, kCoreReg);
+ rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+ int regPtr;
+
+ /* null object? */
+ ArmLIR * pcrLabel = NULL;
+
+ if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+ pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
+ rlArray.lowReg, mir->offset, NULL);
+ }
+
+ regPtr = dvmCompilerAllocTemp(cUnit);
+
+ if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ int regLen = dvmCompilerAllocTemp(cUnit);
+ /* Get len */
+ loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+ /* regPtr -> array data */
+ opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+ genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+ pcrLabel);
+ dvmCompilerFreeTemp(cUnit, regLen);
+ } else {
+ /* regPtr -> array data */
+ opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+ }
+ if ((size == kLong) || (size == kDouble)) {
+ if (scale) {
+ int rNewIndex = dvmCompilerAllocTemp(cUnit);
+ opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
+ opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
+ dvmCompilerFreeTemp(cUnit, rNewIndex);
+ } else {
+ opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
+ }
+ rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
+
+ HEAP_ACCESS_SHADOW(true);
+ loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
+ HEAP_ACCESS_SHADOW(false);
+
+ dvmCompilerFreeTemp(cUnit, regPtr);
+ storeValueWide(cUnit, rlDest, rlResult);
+ } else {
+ rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
+
+ HEAP_ACCESS_SHADOW(true);
+ loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
+ scale, size);
+ HEAP_ACCESS_SHADOW(false);
+
+ dvmCompilerFreeTemp(cUnit, regPtr);
+ storeValue(cUnit, rlDest, rlResult);
+ }
+}
+
+/*
+ * Generate array store
+ *
+ */
+static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
+ RegLocation rlArray, RegLocation rlIndex,
+ RegLocation rlSrc, int scale)
+{
+ RegisterClass regClass = dvmCompilerRegClassBySize(size);
+ int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+ int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+
+ int regPtr;
+ rlArray = loadValue(cUnit, rlArray, kCoreReg);
+ rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+
+ if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) {
+ dvmCompilerClobber(cUnit, rlArray.lowReg);
+ regPtr = rlArray.lowReg;
+ } else {
+ regPtr = dvmCompilerAllocTemp(cUnit);
+ genRegCopy(cUnit, regPtr, rlArray.lowReg);
+ }
+
+ /* null object? */
+ ArmLIR * pcrLabel = NULL;
+
+ if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+ pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
+ mir->offset, NULL);
+ }
+
+ if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ int regLen = dvmCompilerAllocTemp(cUnit);
+ //NOTE: max live temps(4) here.
+ /* Get len */
+ loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+ /* regPtr -> array data */
+ opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+ genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+ pcrLabel);
+ dvmCompilerFreeTemp(cUnit, regLen);
+ } else {
+ /* regPtr -> array data */
+ opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+ }
+ /* at this point, regPtr points to array, 2 live temps */
+ if ((size == kLong) || (size == kDouble)) {
+ //TODO: need specific wide routine that can handle fp regs
+ if (scale) {
+ int rNewIndex = dvmCompilerAllocTemp(cUnit);
+ opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
+ opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
+ dvmCompilerFreeTemp(cUnit, rNewIndex);
+ } else {
+ opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
+ }
+ rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+
+ HEAP_ACCESS_SHADOW(true);
+ storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
+ HEAP_ACCESS_SHADOW(false);
+
+ dvmCompilerFreeTemp(cUnit, regPtr);
+ } else {
+ rlSrc = loadValue(cUnit, rlSrc, regClass);
+
+ HEAP_ACCESS_SHADOW(true);
+ storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
+ scale, size);
+ HEAP_ACCESS_SHADOW(false);
+ }
+}
+
/*
* Generate array object store
* Must use explicit register allocation here because of
@@ -1318,8 +1461,8 @@ static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir,
/* backward branch? */
bool backwardBranch = (bb->taken->startOffset <= mir->offset);
- if (backwardBranch && (gDvmJit.genSuspendPoll ||
- (cUnit->jitMode == kJitLoop && !cUnit->loopAnalysis->suppressSuspend))) {
+ if (backwardBranch &&
+ (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
genSuspendPoll(cUnit, mir);
}
@@ -1962,14 +2105,17 @@ static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
/* backward branch? */
bool backwardBranch = (bb->taken->startOffset <= mir->offset);
- if (backwardBranch && (gDvmJit.genSuspendPoll ||
- (cUnit->jitMode == kJitLoop && !cUnit->loopAnalysis->suppressSuspend))) {
+ if (backwardBranch &&
+ (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
genSuspendPoll(cUnit, mir);
}
RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+ opRegImm(cUnit, kOpCmp, rlSrc.lowReg, 0);
+
+//TUNING: break this out to allow use of Thumb2 CB[N]Z
switch (dalvikOpcode) {
case OP_IF_EQZ:
cond = kArmCondEq;
@@ -1994,8 +2140,7 @@ static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
ALOGE("Unexpected opcode (%d) for Fmt21t", dalvikOpcode);
dvmCompilerAbort(cUnit);
}
- ArmLIR* branch = genCmpImmBranch(cUnit, cond, rlSrc.lowReg, 0);
- branch->generic.target = (LIR*)&labelList[bb->taken->id];
+ genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
/* This mostly likely will be optimized away in a later phase */
genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
return false;
@@ -2118,7 +2263,10 @@ static bool handleEasyMultiply(CompilationUnit *cUnit,
} else {
// Reverse subtract: (src << (shift + 1)) - src.
assert(powerOfTwoMinusOne);
- genMultiplyByShiftAndReverseSubtract(cUnit, rlSrc, rlResult, lowestSetBit(lit + 1));
+ // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
+ int tReg = dvmCompilerAllocTemp(cUnit);
+ opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
+ opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
}
storeValue(cUnit, rlDest, rlResult);
return true;
@@ -2462,8 +2610,8 @@ static bool handleFmt22t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
/* backward branch? */
bool backwardBranch = (bb->taken->startOffset <= mir->offset);
- if (backwardBranch && (gDvmJit.genSuspendPoll ||
- (cUnit->jitMode == kJitLoop && !cUnit->loopAnalysis->suppressSuspend))) {
+ if (backwardBranch &&
+ (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
genSuspendPoll(cUnit, mir);
}
@@ -3699,7 +3847,7 @@ static void handlePCReconstruction(CompilationUnit *cUnit,
* We should never reach here through fall-through code, so insert
* a bomb to signal troubles immediately.
*/
- if ((numElems) || (cUnit->jitMode == kJitLoop)) {
+ if (numElems) {
newLIR0(cUnit, kThumbUndefined);
}
@@ -4354,7 +4502,6 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
(LIR *) cUnit->loopAnalysis->branchToBody);
dvmCompilerAppendLIR(cUnit,
(LIR *) cUnit->loopAnalysis->branchToPCR);
- cUnit->loopAnalysis->branchesAdded = true;
}
if (headLIR) {
diff --git a/vm/compiler/codegen/arm/GlobalOptimizations.cpp b/vm/compiler/codegen/arm/GlobalOptimizations.cpp
index b6c81851a..45a063105 100644
--- a/vm/compiler/codegen/arm/GlobalOptimizations.cpp
+++ b/vm/compiler/codegen/arm/GlobalOptimizations.cpp
@@ -17,16 +17,6 @@
#include "Dalvik.h"
#include "vm/compiler/CompilerInternals.h"
#include "ArmLIR.h"
-#include "vm/compiler/Loop.h"
-
-/* Max number of hoistable loop load operations. */
-#define MAX_LOAD_HOISTS 25 // Should be more than enough, considering ARM only has 16 registers.
-/* Max number of operations that access dalvik registers. */
-#define MAX_REGISTER_OPS 50
-
-/* Enable verbose prints for loop load hoisting by setting this define.
-#define LOOP_LOAD_HOIST_VERBOSE
- */
/*
* Identify unconditional branches that jump to the immediate successor of the
@@ -69,205 +59,7 @@ static void applyRedundantBranchElimination(CompilationUnit *cUnit)
}
}
-/*
- * Perform a pass to hoist all frame pointer load instructions that
- * are independent, outside the loop.
- */
-static void applyLoopLoadHoisting(CompilationUnit *cUnit)
-{
- ArmLIR *thisLIR, *labelLIR, *lastLIR, *insertLIR;
- ArmLIR *loadLIR[MAX_LOAD_HOISTS];
- ArmLIR *regLIR[MAX_REGISTER_OPS];
- u8 defLoadMask = 0;
- u8 defMask = 0;
- u8 masterDefMask = ~((1ULL << kRegEnd) - 1);
- bool isValidLoop = false;
- int loadCount = 0;
- int regCount = 0;
- int loadindex;
- int hoistCount = 0;
-
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("GlobalOpts LoopLoadHoisting applied on '%s'", cUnit->method->name);
- cUnit->printMe = true;
-#endif
-
- labelLIR = (ArmLIR *) cUnit->firstLIRInsn;
- lastLIR = (ArmLIR *) cUnit->lastLIRInsn;
- insertLIR = NULL;
-
- /* Find the insert point */
- while ((labelLIR != lastLIR) &&
- (labelLIR->flags.isNop ||
- (labelLIR->opcode != kArmPseudoNormalBlockLabel))) {
- if ((cUnit->loopAnalysis->branchesAdded) &&
- (labelLIR->opcode == kThumbBUncond) &&
- (insertLIR == NULL) && (!labelLIR->flags.isNop))
- insertLIR = labelLIR;
-
- labelLIR = NEXT_LIR(labelLIR);
- }
-
- if (labelLIR->opcode != kArmPseudoNormalBlockLabel) {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Can't hoist - no loop label found!");
-#endif
- return;
- }
-
- if (insertLIR == NULL) {
- insertLIR = labelLIR;
- }
- else if ((ArmLIR *) insertLIR->generic.target != labelLIR) {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Can't hoist - branch target does not match!");
-#endif
- return;
- }
-
- /* Search for eligible load instructions to hoist */
- for (thisLIR = labelLIR;
- thisLIR != lastLIR;
- thisLIR = NEXT_LIR(thisLIR)) {
- bool handled = false;
- int flags;
-
- /* Skip non-interesting instructions */
- if (thisLIR->flags.isNop || isPseudoOpcode(thisLIR->opcode))
- continue;
-
- flags = EncodingMap[thisLIR->opcode].flags;
-
- /* If it's a load instruction, check if it's a hoist candidate. */
- if (((flags & IS_LOAD) != 0) &&
- ((thisLIR->useMask & ENCODE_DALVIK_REG) != 0)) {
- if (regCount >= MAX_REGISTER_OPS) {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Out of register list space!");
-#endif
- return;
- }
- regLIR[regCount++] = thisLIR;
-
- if ((((defLoadMask | defMask) & thisLIR->defMask) == 0) &&
- (loadCount < MAX_LOAD_HOISTS)) {
- defLoadMask |= thisLIR->defMask;
- loadLIR[loadCount++] = thisLIR;
- handled = true;
- } else {
- masterDefMask |= thisLIR->defMask;
- }
- /* If it's a store instruction, check if it matches a previous
- hoistable load instruction. If so, reset the global def-flag to
- indicate that the load is still hoistable. */
- } else if (((flags & IS_STORE) != 0) &&
- ((thisLIR->defMask & ENCODE_DALVIK_REG) != 0)) {
- if (regCount >= MAX_REGISTER_OPS) {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Out of register list space!");
-#endif
- return;
- }
- regLIR[regCount++] = thisLIR;
-
- if ((thisLIR->useMask & defLoadMask) != 0) {
- handled = true;
- for (int i = loadCount - 1; i >= 0; i--) {
- if ((thisLIR->aliasInfo == loadLIR[i]->aliasInfo) &&
- (thisLIR->operands[0] == loadLIR[i]->operands[0])) {
- defMask &= ~(loadLIR[i]->defMask);
- break;
- }
- }
- }
- /* If it's a branch instruction, check if it's the loop branch.
- If it matches the label, mark it as a valid loop. */
- } else if ((flags & IS_BRANCH) != 0) {
- handled = true;
- if (labelLIR == (ArmLIR *) thisLIR->generic.target) {
- isValidLoop = true;
- } else if ((thisLIR->opcode >= kThumbBlx1) &&
- (thisLIR->opcode <= kThumbBlxR))
- {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Trace contains branch to subroutine!");
-#endif
- return;
- }
- } else if (thisLIR->opcode == kThumbUndefined) {
- break;
- }
-
- /* If it's not a 'special' instruction, accumulate into def-flags. */
- if (!handled)
- defMask |= thisLIR->defMask;
- }
-
- defLoadMask &= ~(defMask | masterDefMask);
- if (!isValidLoop || (defLoadMask == 0)) {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Loop not valid, or defLoadMask (0x%llx) was zero!", defLoadMask);
-#endif
- return;
- }
-
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Masks: masterDef: 0x%llx, def: 0x%llx, final defLoad: 0x%llx",
- masterDefMask, defMask, defLoadMask);
-#endif
-
- /* Try to hoist the load operations */
- for (loadindex = 0; loadindex < loadCount; loadindex++) {
- thisLIR = loadLIR[loadindex];
-
- /* Host this load? */
- if ((thisLIR->defMask & defLoadMask) == thisLIR->defMask) {
- int i;
- bool foundAlias = false;
- for (i = 0; i < regCount; i++) {
- if ((thisLIR->aliasInfo == regLIR[i]->aliasInfo) &&
- (thisLIR->operands[0] != regLIR[i]->operands[0])) {
- foundAlias = true;
- for (int k = loadindex; k < loadCount; k++) {
- if (loadLIR[k] == regLIR[i]) {
- loadLIR[k]->defMask = -1;
- break;
- }
- }
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Register alias found between these two load ops:");
- dvmDumpLIRInsn((LIR*)thisLIR, NULL);
- dvmDumpLIRInsn((LIR*)regLIR[i], NULL);
-#endif
- break;
- }
- }
-
- if (!foundAlias) {
-#ifdef LOOP_LOAD_HOIST_VERBOSE
- ALOGD("Hoisting this load op:");
- dvmDumpLIRInsn((LIR*)thisLIR, NULL);
-#endif
- ArmLIR *newLoadLIR = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR),
- true);
- *newLoadLIR = *thisLIR;
- dvmCompilerInsertLIRBefore((LIR *) insertLIR,
- (LIR *) newLoadLIR);
- thisLIR->flags.isNop = true;
- hoistCount++;
- }
- }
- }
-
- if (cUnit->printMe)
- ALOGD("GlobalOpt LoopLoadHoist hoisted %d load ops.", hoistCount);
-}
-
void dvmCompilerApplyGlobalOptimizations(CompilationUnit *cUnit)
{
applyRedundantBranchElimination(cUnit);
-
- if (cUnit->jitMode == kJitLoop) {
- applyLoopLoadHoisting(cUnit);
- }
}
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.cpp b/vm/compiler/codegen/arm/LocalOptimizations.cpp
index be3aa484a..8013d0059 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.cpp
+++ b/vm/compiler/codegen/arm/LocalOptimizations.cpp
@@ -453,176 +453,6 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
}
}
-/*
- * Find all lsl/lsr and add that can be replaced with a
- * combined lsl/lsr + add
- */
-static void applyShiftArithmeticOpts(CompilationUnit *cUnit,
- ArmLIR *headLIR,
- ArmLIR *tailLIR) {
- ArmLIR *thisLIR = NULL;
-
- for (thisLIR = headLIR;
- thisLIR != tailLIR;
- thisLIR = NEXT_LIR(thisLIR)) {
-
- if(thisLIR->flags.isNop) {
- continue;
- }
-
- if(thisLIR->opcode == kThumb2LslRRI5 || thisLIR->opcode == kThumb2LsrRRI5 ||
- thisLIR->opcode == kThumbLslRRI5 || thisLIR->opcode == kThumbLsrRRI5) {
-
- /* Find next that is not nop and not pseudo code */
- ArmLIR *nextLIR = NULL;
- for(nextLIR = NEXT_LIR(thisLIR);
- nextLIR != tailLIR;
- nextLIR = NEXT_LIR(nextLIR)) {
- if (!nextLIR->flags.isNop && !isPseudoOpcode(nextLIR->opcode)) {
- break;
- }
- }
-
- if(nextLIR == tailLIR) {
- return;
- }
-
- if(nextLIR->opcode == kThumb2AddRRR &&
- nextLIR->operands[3] == 0 &&
- (nextLIR->operands[1] == thisLIR->operands[0] ||
- nextLIR->operands[2] == thisLIR->operands[0])) {
-
- /*
- * Found lsl/lsr & add, use barrel shifter for add instead
- *
- * (1) Normal case
- * [lsl/lsr] r9, r1, #x
- * [add] r0, r2, r9
- *
- * (2) Changing place of args for add
- * [lsl/lsr] r9, r1, #x
- * [add] r0, r9, r2
- *
- * (3) Using r1 and r1 shifted as args for add
- * [lsl/lsr] r9, r1, #x
- * [add] r0, r1, r9
- *
- * (4) Using r1 and r1 shifted as args for add, variant 2
- * [lsl/lsr] r9, r1, #x
- * [add] r0, r9, r1
- *
- * Result:
- * [add] rDest, rSrc1, rSrc2, [lsl/lsr] x
- */
-
- int type = kArmLsl;
- if(thisLIR->opcode == kThumb2LsrRRI5 || thisLIR->opcode == kThumbLsrRRI5) {
- type = kArmLsr;
- }
-
- /* For most cases keep original rSrc1 */
- int rSrc1 = nextLIR->operands[1];
-
- if(thisLIR->operands[0] == nextLIR->operands[1]) {
- /* Case 2 & 4: move original rSrc2 to rScr1 since
- reg to be shifted need to be in rSrc2 */
- rSrc1 = nextLIR->operands[2];
- }
-
- /* Reg to be shifted need to be in rSrc2 */
- int rSrc2 = thisLIR->operands[1];
-
- /* Encode type of shift and amount */
- int shift = ((thisLIR->operands[2] & 0x1f) << 2) | type;
-
- /* Keep rDest, but change rSrc1, rSrc2 and use shift */
- ArmLIR* newLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- newLIR->opcode = nextLIR->opcode;
- newLIR->operands[0] = nextLIR->operands[0];
- newLIR->operands[1] = rSrc1;
- newLIR->operands[2] = rSrc2;
- newLIR->operands[3] = shift;
- dvmCompilerSetupResourceMasks(newLIR);
- dvmCompilerInsertLIRBefore((LIR *) nextLIR, (LIR *) newLIR);
-
- thisLIR->flags.isNop = true;
- nextLIR->flags.isNop = true;
-
- /*
- * Avoid looping through nops already identified.
- * Continue directly after the updated instruction
- * instead.
- */
- thisLIR = nextLIR;
- }
- }
- }
-}
-
-/*
- * Find all vmul and vadd that can be replaced with a vmla
- */
-static void applyMultiplyArithmeticOpts(CompilationUnit *cUnit,
- ArmLIR *headLIR,
- ArmLIR *tailLIR) {
- ArmLIR *thisLIR = NULL;
-
- for (thisLIR = headLIR;
- thisLIR != tailLIR;
- thisLIR = NEXT_LIR(thisLIR)) {
-
- if(thisLIR->opcode == kThumb2Vmuld && !thisLIR->flags.isNop) {
-
- /* Find next that is not nop and not pseudo code */
- ArmLIR *nextLIR = NULL;
- for(nextLIR = NEXT_LIR(thisLIR);
- nextLIR != tailLIR;
- nextLIR = NEXT_LIR(nextLIR)) {
- if (!nextLIR->flags.isNop && !isPseudoOpcode(nextLIR->opcode)) {
- break;
- }
- }
-
- if(nextLIR == tailLIR) {
- return;
- }
-
- if(nextLIR->opcode == kThumb2Vaddd &&
- nextLIR->operands[0] == nextLIR->operands[1] &&
- nextLIR->operands[2] == thisLIR->operands[0]) {
-
- /*
- * Found vmuld & vadd, use vmla.f64 instead
- *
- * vmuld d9, d9, d10
- * vaddd d8, d8, d9
- *
- * Result:
- * vmla.f64 d8, d9, d10
- */
-
- ArmLIR* newLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
- newLIR->opcode = kThumb2Vmlad;
- newLIR->operands[0] = nextLIR->operands[0];
- newLIR->operands[1] = thisLIR->operands[1];
- newLIR->operands[2] = thisLIR->operands[2];
- dvmCompilerSetupResourceMasks(newLIR);
- dvmCompilerInsertLIRBefore((LIR *) nextLIR, (LIR *) newLIR);
-
- thisLIR->flags.isNop = true;
- nextLIR->flags.isNop = true;
-
- /*
- * Avoid looping through nops already identified.
- * Continue directly after the updated instruction
- * instead.
- */
- thisLIR = nextLIR;
- }
- }
- }
-}
-
void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
LIR *tailLIR)
{
@@ -633,10 +463,4 @@ void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) {
applyLoadHoisting(cUnit, (ArmLIR *) headLIR, (ArmLIR *) tailLIR);
}
- if (!(gDvmJit.disableOpt & (1 << kShiftArithmetic))) {
- applyShiftArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR);
- }
- if (!(gDvmJit.disableOpt & (1 << kMultiplyArithmetic))) {
- applyMultiplyArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR);
- }
}
diff --git a/vm/compiler/codegen/arm/Thumb/Gen.cpp b/vm/compiler/codegen/arm/Thumb/Gen.cpp
index ea42d519b..abc4420f7 100644
--- a/vm/compiler/codegen/arm/Thumb/Gen.cpp
+++ b/vm/compiler/codegen/arm/Thumb/Gen.cpp
@@ -274,152 +274,3 @@ static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
// to do a regular multiply.
opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit);
}
-
-static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit,
- RegLocation rlSrc, RegLocation rlResult, int lit)
-{
- int tReg = dvmCompilerAllocTemp(cUnit);
- opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
- opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
-}
-
-/*
- * Generate array load
- */
-static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
- RegLocation rlArray, RegLocation rlIndex,
- RegLocation rlDest, int scale)
-{
- RegisterClass regClass = dvmCompilerRegClassBySize(size);
- int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
- int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
- RegLocation rlResult;
- rlArray = loadValue(cUnit, rlArray, kCoreReg);
- rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
- int regPtr;
-
- /* null object? */
- ArmLIR * pcrLabel = NULL;
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
- pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
- rlArray.lowReg, mir->offset, NULL);
- }
-
- regPtr = dvmCompilerAllocTemp(cUnit);
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = dvmCompilerAllocTemp(cUnit);
- /* Get len */
- loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
- genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
- pcrLabel);
- dvmCompilerFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
- }
- if ((size == kLong) || (size == kDouble)) {
- if (scale) {
- int rNewIndex = dvmCompilerAllocTemp(cUnit);
- opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
- opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
- dvmCompilerFreeTemp(cUnit, rNewIndex);
- } else {
- opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
- }
- rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
-
- HEAP_ACCESS_SHADOW(true);
- loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
- HEAP_ACCESS_SHADOW(false);
-
- dvmCompilerFreeTemp(cUnit, regPtr);
- storeValueWide(cUnit, rlDest, rlResult);
- } else {
- rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
-
- HEAP_ACCESS_SHADOW(true);
- loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
- scale, size);
- HEAP_ACCESS_SHADOW(false);
-
- dvmCompilerFreeTemp(cUnit, regPtr);
- storeValue(cUnit, rlDest, rlResult);
- }
-}
-
-/*
- * Generate array store
- *
- */
-static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
- RegLocation rlArray, RegLocation rlIndex,
- RegLocation rlSrc, int scale)
-{
- RegisterClass regClass = dvmCompilerRegClassBySize(size);
- int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
- int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
-
- int regPtr;
- rlArray = loadValue(cUnit, rlArray, kCoreReg);
- rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
-
- if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) {
- dvmCompilerClobber(cUnit, rlArray.lowReg);
- regPtr = rlArray.lowReg;
- } else {
- regPtr = dvmCompilerAllocTemp(cUnit);
- genRegCopy(cUnit, regPtr, rlArray.lowReg);
- }
-
- /* null object? */
- ArmLIR * pcrLabel = NULL;
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
- pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
- mir->offset, NULL);
- }
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = dvmCompilerAllocTemp(cUnit);
- //NOTE: max live temps(4) here.
- /* Get len */
- loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
- genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
- pcrLabel);
- dvmCompilerFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
- }
- /* at this point, regPtr points to array, 2 live temps */
- if ((size == kLong) || (size == kDouble)) {
- //TODO: need specific wide routine that can handle fp regs
- if (scale) {
- int rNewIndex = dvmCompilerAllocTemp(cUnit);
- opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
- opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
- } else {
- opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
- }
- rlSrc = loadValueWide(cUnit, rlSrc, regClass);
-
- HEAP_ACCESS_SHADOW(true);
- storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
- HEAP_ACCESS_SHADOW(false);
-
- dvmCompilerFreeTemp(cUnit, regPtr);
- } else {
- rlSrc = loadValue(cUnit, rlSrc, regClass);
-
- HEAP_ACCESS_SHADOW(true);
- storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
- scale, size);
- HEAP_ACCESS_SHADOW(false);
- }
-}
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.cpp b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
index e062369c7..b9265e823 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.cpp
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
@@ -1153,21 +1153,11 @@ static void storePair(CompilationUnit *cUnit, int base, int lowReg, int highReg)
storeBaseDispWide(cUnit, base, 0, lowReg, highReg);
}
-static void storePair(CompilationUnit *cUnit, int base, int displacement, int lowReg, int highReg)
-{
- storeBaseDispWide(cUnit, base, displacement, lowReg, highReg);
-}
-
static void loadPair(CompilationUnit *cUnit, int base, int lowReg, int highReg)
{
loadBaseDispWide(cUnit, NULL, base, 0, lowReg, highReg, INVALID_SREG);
}
-static void loadPair(CompilationUnit *cUnit, int base, int displacement, int lowReg, int highReg)
-{
- loadBaseDispWide(cUnit, NULL, base, displacement, lowReg, highReg, INVALID_SREG);
-}
-
/*
* Generate a register comparison to an immediate and branch. Caller
* is responsible for setting branch target field.
diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.cpp b/vm/compiler/codegen/arm/Thumb2/Gen.cpp
index 432e7bcaf..ea6454779 100644
--- a/vm/compiler/codegen/arm/Thumb2/Gen.cpp
+++ b/vm/compiler/codegen/arm/Thumb2/Gen.cpp
@@ -307,9 +307,10 @@ static void genMonitorExit(CompilationUnit *cUnit, MIR *mir)
// Is lock unheld on lock or held by us (==threadId) on unlock?
opRegRegImm(cUnit, kOpAnd, r7, r2,
(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
+ opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
LW_LOCK_OWNER_SHIFT - 1);
- opRegRegRegShift(cUnit, kOpSub, r2, r2, r3, encodeShift(kArmLsl, LW_LOCK_OWNER_SHIFT)); // Align owner
+ opRegReg(cUnit, kOpSub, r2, r3);
hopBranch = opCondBranch(cUnit, kArmCondNe);
dvmCompilerGenMemBarrier(cUnit, kSY);
storeWordDisp(cUnit, r1, offsetof(Object, lock), r7);
@@ -452,153 +453,3 @@ static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
}
}
-
-static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit,
- RegLocation rlSrc, RegLocation rlResult, int lit)
-{
- newLIR4(cUnit, kThumb2RsbRRR, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
- encodeShift(kArmLsl, lit));
-}
-
-/*
- * Generate array load.
- * For wide array access using scale, combine add with shift.
- * When using offset, use ldr instruction with offset capabilities.
- */
-static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
- RegLocation rlArray, RegLocation rlIndex,
- RegLocation rlDest, int scale)
-{
- RegisterClass regClass = dvmCompilerRegClassBySize(size);
- int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
- int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
- RegLocation rlResult;
- rlArray = loadValue(cUnit, rlArray, kCoreReg);
- rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
- int regPtr;
-
- /* null object? */
- ArmLIR * pcrLabel = NULL;
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
- pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
- rlArray.lowReg, mir->offset, NULL);
- }
-
- regPtr = dvmCompilerAllocTemp(cUnit);
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = dvmCompilerAllocTemp(cUnit);
- /* Get len */
- loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
- pcrLabel);
- dvmCompilerFreeTemp(cUnit, regLen);
- }
- if ((size == kLong) || (size == kDouble)) {
- int rNewIndex = dvmCompilerAllocTemp(cUnit);
- if (scale) {
- /* Combine add with shift */
- opRegRegRegShift(cUnit, kOpAdd, rNewIndex, rlArray.lowReg,
- rlIndex.lowReg, encodeShift(kArmLsl, scale));
- } else {
- opRegRegReg(cUnit, kOpAdd, rNewIndex, regPtr, rlIndex.lowReg);
- }
- rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
-
- HEAP_ACCESS_SHADOW(true);
- /* Use data offset */
- loadPair(cUnit, rNewIndex, dataOffset, rlResult.lowReg, rlResult.highReg);
- HEAP_ACCESS_SHADOW(false);
-
- dvmCompilerFreeTemp(cUnit, rNewIndex);
- dvmCompilerFreeTemp(cUnit, regPtr);
- storeValueWide(cUnit, rlDest, rlResult);
- } else {
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
-
- rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
-
- HEAP_ACCESS_SHADOW(true);
- loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
- scale, size);
- HEAP_ACCESS_SHADOW(false);
-
- dvmCompilerFreeTemp(cUnit, regPtr);
- storeValue(cUnit, rlDest, rlResult);
- }
-}
-
-/*
- * Generate array store.
- * For wide array access using scale, combine add with shift.
- * When using offset, use str instruction with offset capabilities.
- */
-static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
- RegLocation rlArray, RegLocation rlIndex,
- RegLocation rlSrc, int scale)
-{
- RegisterClass regClass = dvmCompilerRegClassBySize(size);
- int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
- int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
-
- int regPtr;
- rlArray = loadValue(cUnit, rlArray, kCoreReg);
- rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
-
- if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) {
- dvmCompilerClobber(cUnit, rlArray.lowReg);
- regPtr = rlArray.lowReg;
- } else {
- regPtr = dvmCompilerAllocTemp(cUnit);
- genRegCopy(cUnit, regPtr, rlArray.lowReg);
- }
-
- /* null object? */
- ArmLIR * pcrLabel = NULL;
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
- pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
- mir->offset, NULL);
- }
-
- if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = dvmCompilerAllocTemp(cUnit);
- //NOTE: max live temps(4) here.
- /* Get len */
- loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
- pcrLabel);
- dvmCompilerFreeTemp(cUnit, regLen);
- }
- /* at this point, regPtr points to array, 2 live temps */
- if ((size == kLong) || (size == kDouble)) {
- //TODO: need specific wide routine that can handle fp regs
- int rNewIndex = dvmCompilerAllocTemp(cUnit);
- if (scale) {
- opRegRegRegShift(cUnit, kOpAdd, rNewIndex, rlArray.lowReg,
- rlIndex.lowReg, encodeShift(kArmLsl, scale));
- } else {
- opRegRegReg(cUnit, kOpAdd, rNewIndex, regPtr, rlIndex.lowReg);
- }
- rlSrc = loadValueWide(cUnit, rlSrc, regClass);
-
- HEAP_ACCESS_SHADOW(true);
- storePair(cUnit, rNewIndex, dataOffset, rlSrc.lowReg, rlSrc.highReg);
- HEAP_ACCESS_SHADOW(false);
-
- dvmCompilerFreeTemp(cUnit, rNewIndex);
- dvmCompilerFreeTemp(cUnit, regPtr);
- } else {
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
-
- rlSrc = loadValue(cUnit, rlSrc, regClass);
-
- HEAP_ACCESS_SHADOW(true);
- storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
- scale, size);
- HEAP_ACCESS_SHADOW(false);
- }
-}