summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbuzbee <buzbee@google.com>2010-12-15 16:32:35 -0800
committerbuzbee <buzbee@google.com>2010-12-17 12:13:59 -0800
commit2e152baec01433de9c63633ebc6f4adf1cea3a87 (patch)
treeab5329129870bdd66296d776374d9e4bed05e8e6
parent08c2f9fc9cd7c1cb36c31d6f15b4d13a7cc15432 (diff)
downloadandroid_dalvik-2e152baec01433de9c63633ebc6f4adf1cea3a87.tar.gz
android_dalvik-2e152baec01433de9c63633ebc6f4adf1cea3a87.tar.bz2
android_dalvik-2e152baec01433de9c63633ebc6f4adf1cea3a87.zip
[JIT] Trace profiling support
In preparation for method compilation, this CL causes all traces to include two entry points: profiling and non-profiling. For now, the profiling entry will only be used if dalvik is run with -Xjitprofile, and largely works like it did before. The difference is that profiling support no longer requires the "assert" build - it's always there now. This will enable us to do a form of sampling profiling of traces in order to identify hot methods or hot trace groups, while keeping the overhead low by only switching profiling on periodically. To turn the periodic profiling on and off, we simply unchain all existing translations and set the appropriate global profile state. The underlying translation lookup and chaining utilties will examine the profile state to determine which entry point to use (i.e. - profiling or non-profiling) while the traces naturally rechain during further execution. Change-Id: I9ee33e69e33869b9fab3a57e88f9bc524175172b
-rw-r--r--vm/Globals.h20
-rw-r--r--vm/Init.c2
-rw-r--r--vm/compiler/Compiler.c38
-rw-r--r--vm/compiler/Compiler.h9
-rw-r--r--vm/compiler/CompilerIR.h2
-rw-r--r--vm/compiler/Frontend.c20
-rw-r--r--vm/compiler/codegen/arm/ArmLIR.h2
-rw-r--r--vm/compiler/codegen/arm/Assemble.c88
-rw-r--r--vm/compiler/codegen/arm/CodegenDriver.c69
-rw-r--r--vm/compiler/codegen/arm/Thumb/Gen.c56
-rw-r--r--vm/compiler/codegen/arm/Thumb2/Gen.c53
-rw-r--r--vm/compiler/template/armv5te-vfp/TemplateOpList.h1
-rw-r--r--vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S26
-rw-r--r--vm/compiler/template/armv5te/TemplateOpList.h1
-rw-r--r--vm/compiler/template/armv5te/footer.S2
-rw-r--r--vm/compiler/template/armv7-a-neon/TemplateOpList.h1
-rw-r--r--vm/compiler/template/armv7-a/TemplateOpList.h1
-rw-r--r--vm/compiler/template/config-armv5te-vfp1
-rw-r--r--vm/compiler/template/config-armv7-a1
-rw-r--r--vm/compiler/template/config-armv7-a-neon1
-rw-r--r--vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S34
-rw-r--r--vm/compiler/template/out/CompilerTemplateAsm-armv5te.S34
-rw-r--r--vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S34
-rw-r--r--vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S34
-rw-r--r--vm/interp/InterpDefs.h19
-rw-r--r--vm/interp/Jit.c96
-rw-r--r--vm/interp/Jit.h32
-rw-r--r--vm/mterp/Mterp.c1
-rw-r--r--vm/mterp/common/asm-constants.h10
29 files changed, 534 insertions, 154 deletions
diff --git a/vm/Globals.h b/vm/Globals.h
index 25c485f57..e761081db 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -682,6 +682,14 @@ extern struct DvmGlobals gDvm;
#if defined(WITH_JIT)
+/* Trace profiling modes. Ordering matters - off states before on states */
+typedef enum TraceProfilingModes {
+ kTraceProfilingDisabled = 0, // Not profiling
+ kTraceProfilingPeriodicOff = 1, // Periodic profiling, off phase
+ kTraceProfilingContinuous = 2, // Always profiling
+ kTraceProfilingPeriodicOn = 3 // Periodic profiling, on phase
+} TraceProfilingModes;
+
/*
* Exiting the compiled code w/o chaining will incur overhead to look up the
* target in the code cache which is extra work only when JIT is enabled. So
@@ -720,9 +728,12 @@ struct DvmJitGlobals {
* are stored in each thread. */
struct JitEntry *pJitEntryTable;
- /* Array of profile threshold counters */
+ /* Array of compilation trigger threshold counters */
unsigned char *pProfTable;
+ /* Trace profiling counters */
+ struct JitTraceProfCounters *pJitTraceProfCounters;
+
/* Copy of pProfTable used for temporarily disabling the Jit */
unsigned char *pProfTableCopy;
@@ -801,8 +812,11 @@ struct DvmJitGlobals {
/* Flag to dump all compiled code */
bool printMe;
- /* Flag to count trace execution */
- bool profile;
+ /* Trace profiling mode */
+ TraceProfilingModes profileMode;
+
+ /* Periodic trace profiling countdown timer */
+ int profileCountdown;
/* Vector to disable selected optimizations */
int disableOpt;
diff --git a/vm/Init.c b/vm/Init.c
index 741799db0..cfd6be7b3 100644
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -949,7 +949,7 @@ static int dvmProcessOptions(int argc, const char* const argv[],
} else if (strncmp(argv[i], "-Xjitverbose", 12) == 0) {
gDvmJit.printMe = true;
} else if (strncmp(argv[i], "-Xjitprofile", 12) == 0) {
- gDvmJit.profile = true;
+ gDvmJit.profileMode = kTraceProfilingContinuous;
} else if (strncmp(argv[i], "-Xjitdisableopt", 15) == 0) {
/* Disable selected optimizations */
if (argv[i][15] == ':') {
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c
index c8ff62ee3..adb58dd4b 100644
--- a/vm/compiler/Compiler.c
+++ b/vm/compiler/Compiler.c
@@ -332,6 +332,7 @@ static bool compilerThreadStartup(void)
{
JitEntry *pJitTable = NULL;
unsigned char *pJitProfTable = NULL;
+ JitTraceProfCounters *pJitTraceProfCounters = NULL;
unsigned int i;
if (!dvmCompilerArchInit())
@@ -398,6 +399,15 @@ static bool compilerThreadStartup(void)
/* Is chain field wide enough for termination pattern? */
assert(pJitTable[0].u.info.chain == gDvmJit.jitTableSize);
+ /* Allocate the trace profiling structure */
+ pJitTraceProfCounters = (JitTraceProfCounters*)
+ calloc(1, sizeof(*pJitTraceProfCounters));
+ if (!pJitTraceProfCounters) {
+ LOGE("jit trace prof counters allocation failed\n");
+ dvmUnlockMutex(&gDvmJit.tableLock);
+ goto fail;
+ }
+
gDvmJit.pJitEntryTable = pJitTable;
gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
gDvmJit.jitTableEntriesUsed = 0;
@@ -409,6 +419,7 @@ static bool compilerThreadStartup(void)
*/
gDvmJit.pProfTable = dvmDebuggerOrProfilerActive() ? NULL : pJitProfTable;
gDvmJit.pProfTableCopy = pJitProfTable;
+ gDvmJit.pJitTraceProfCounters = pJitTraceProfCounters;
dvmUnlockMutex(&gDvmJit.tableLock);
/* Signal running threads to refresh their cached pJitTable pointers */
@@ -620,27 +631,19 @@ static void *compilerThreadStart(void *arg)
if (gDvmJit.haltCompilerThread) {
LOGD("Compiler shutdown in progress - discarding request");
} else if (!gDvmJit.codeCacheFull) {
- bool compileOK = false;
jmp_buf jmpBuf;
work.bailPtr = &jmpBuf;
bool aborted = setjmp(jmpBuf);
if (!aborted) {
- compileOK = dvmCompilerDoWork(&work);
- }
- if (aborted || !compileOK) {
-#if 0 // for x86 JIT testing
- dvmJitSetCodeAddr(work.pc,
- dvmCompilerGetInterpretTemplate(),
- work.result.instructionSet);
-#endif
- dvmCompilerArenaReset();
- } else if (!work.result.discardResult &&
- work.result.codeAddress) {
- /* Make sure that proper code addr is installed */
- assert(work.result.codeAddress != NULL);
- dvmJitSetCodeAddr(work.pc, work.result.codeAddress,
- work.result.instructionSet);
+ bool codeCompiled = dvmCompilerDoWork(&work);
+ if (codeCompiled && !work.result.discardResult &&
+ work.result.codeAddress) {
+ dvmJitSetCodeAddr(work.pc, work.result.codeAddress,
+ work.result.instructionSet,
+ work.result.profileCodeSize);
+ }
}
+ dvmCompilerArenaReset();
}
free(work.info);
#if defined(WITH_JIT_TUNING)
@@ -697,7 +700,8 @@ void dvmCompilerShutdown(void)
gDvmJit.pProfTable = NULL;
gDvmJit.pProfTableCopy = NULL;
- if (gDvm.verboseShutdown) {
+ if (gDvm.verboseShutdown ||
+ gDvmJit.profileMode == kTraceProfilingContinuous) {
dvmCompilerDumpStats();
while (gDvmJit.compilerQueueLength)
sleep(5);
diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h
index 0a43df3c9..cd9d21bb3 100644
--- a/vm/compiler/Compiler.h
+++ b/vm/compiler/Compiler.h
@@ -45,15 +45,8 @@
#define COMPILER_TRACE_CHAINING(X)
/* Macro to change the permissions applied to a chunk of the code cache */
-#if !defined(WITH_JIT_TUNING)
#define PROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC)
#define UNPROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC | PROT_WRITE)
-#else
-/* When doing JIT profiling always grant the write permission */
-#define PROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC | \
- (gDvmJit.profile ? PROT_WRITE : 0))
-#define UNPROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC | PROT_WRITE)
-#endif
/* Acquire the lock before removing PROT_WRITE from the specified mem region */
#define UNPROTECT_CODE_CACHE(addr, size) \
@@ -90,6 +83,7 @@ typedef enum JitInstructionSetType {
typedef struct JitTranslationInfo {
void *codeAddress;
JitInstructionSetType instructionSet;
+ int profileCodeSize;
bool discardResult; // Used for debugging divergence and IC patching
bool methodCompilationAborted; // Cannot compile the whole method
Thread *requestingThread; // For debugging purpose
@@ -100,6 +94,7 @@ typedef enum WorkOrderKind {
kWorkOrderMethod = 1, // Work is to compile a whole method
kWorkOrderTrace = 2, // Work is to compile code fragment(s)
kWorkOrderTraceDebug = 3, // Work is to compile/debug code fragment(s)
+ kWorkOrderProfileMode = 4, // Change profiling mode
} WorkOrderKind;
typedef struct CompilerWorkOrder {
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index caf6fa617..54d41a5c5 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -206,11 +206,11 @@ typedef struct CompilationUnit {
void *baseAddr;
bool printMe;
bool allSingleStep;
- bool executionCount; // Add code to count trace executions
bool hasLoop; // Contains a loop
bool hasInvoke; // Contains an invoke instruction
bool heapMemOp; // Mark mem ops for self verification
bool wholeMethod;
+ int profileCodeSize; // Size of the profile prefix in bytes
int numChainingCells[kChainingCellGap];
LIR *firstChainingLIR[kChainingCellGap];
LIR *chainingCellBottom;
diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c
index 95ef026c0..915e5f3fe 100644
--- a/vm/compiler/Frontend.c
+++ b/vm/compiler/Frontend.c
@@ -458,9 +458,6 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
/* Initialize the printMe flag */
cUnit.printMe = gDvmJit.printMe;
- /* Initialize the profile flag */
- cUnit.executionCount = gDvmJit.profile;
-
/* Setup the method */
cUnit.method = desc->method;
@@ -634,6 +631,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
for (blockId = 0; blockId < blockList->numUsed; blockId++) {
curBB = (BasicBlock *) dvmGrowableListGetElement(blockList, blockId);
MIR *lastInsn = curBB->lastMIRInsn;
+ BasicBlock *backwardCell;
/* Skip empty blocks */
if (lastInsn == NULL) {
continue;
@@ -708,25 +706,11 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
exitBB->needFallThroughBranch = true;
loopBranch->taken = exitBB;
-#if defined(WITH_SELF_VERIFICATION)
- BasicBlock *backwardCell =
+ backwardCell =
dvmCompilerNewBB(kChainingCellBackwardBranch, numBlocks++);
dvmInsertGrowableList(blockList, (intptr_t) backwardCell);
backwardCell->startOffset = entryCodeBB->startOffset;
loopBranch->fallThrough = backwardCell;
-#elif defined(WITH_JIT_TUNING)
- if (gDvmJit.profile) {
- BasicBlock *backwardCell =
- dvmCompilerNewBB(kChainingCellBackwardBranch, numBlocks++);
- dvmInsertGrowableList(blockList, (intptr_t) backwardCell);
- backwardCell->startOffset = entryCodeBB->startOffset;
- loopBranch->fallThrough = backwardCell;
- } else {
- loopBranch->fallThrough = entryCodeBB;
- }
-#else
- loopBranch->fallThrough = entryCodeBB;
-#endif
/* Create the chaining cell as the fallthrough of the exit block */
exitChainingCell = dvmCompilerNewBB(kChainingCellNormal,
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 4f3434dec..437c2ed96 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -619,6 +619,8 @@ typedef enum ArmOpcode {
kThumb2Bfc, /* bfc [11110011011011110] [0] imm3[14-12]
rd[11-8] imm2[7-6] [0] msb[4-0] */
kThumb2Dmb, /* dmb [1111001110111111100011110101] option[3-0] */
+ kThumb2LdrPcReln12, /* ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12]
+ imm12[11-0] */
kArmLast,
} ArmOpcode;
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 4154387ba..b5c04f5d0 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -876,6 +876,11 @@ ArmEncodingMap EncodingMap[kArmLast] = {
kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_UNARY_OP,
"dmb","#!0B",2),
+ ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000,
+ kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1,
+ IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
+ "ldr", "r!0d, [rpc, -#!1d]", 2),
};
/*
@@ -1163,21 +1168,21 @@ static void matchSignatureBreakpoint(const CompilationUnit *cUnit,
/*
* Translation layout in the code cache. Note that the codeAddress pointer
* in JitTable will point directly to the code body (field codeAddress). The
- * chain cell offset codeAddress - 2, and (if present) executionCount is at
- * codeAddress - 6.
+ * chain cell offset codeAddress - 2, and the address of the trace profile
+ * counter is at codeAddress - 6.
*
* +----------------------------+
- * | Execution count | -> [Optional] 4 bytes
+ * | Trace Profile Counter addr | -> 4 bytes
* +----------------------------+
* +--| Offset to chain cell counts| -> 2 bytes
* | +----------------------------+
- * | | Code body | -> Start address for translation
- * | | | variable in 2-byte chunks
- * | . . (JitTable's codeAddress points here)
+ * | | Trace profile code | <- entry point when profiling
+ * | . - - - - - - - .
+ * | | Code body | <- entry point when not profiling
* | . .
* | | |
* | +----------------------------+
- * | | Chaining Cells | -> 12/16 bytes each, must be 4 byte aligned
+ * | | Chaining Cells | -> 12/16 bytes, 4 byte aligned
* | . .
* | . .
* | | |
@@ -1251,13 +1256,10 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info)
chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
/*
- * Replace the CHAIN_CELL_OFFSET_TAG with the real value. If trace
- * profiling is enabled, subtract 4 (occupied by the counter word) from
- * the absolute offset as the value stored in chainCellOffsetLIR is the
- * delta from &chainCellOffsetLIR to &ChainCellCounts.
+ * Adjust the CHAIN_CELL_OFFSET_TAG LIR's offset to remove the
+ * space occupied by the pointer to the trace profiling counter.
*/
- chainCellOffsetLIR->operands[0] =
- gDvmJit.profile ? (chainCellOffset - 4) : chainCellOffset;
+ chainCellOffsetLIR->operands[0] = chainCellOffset - 4;
offset += sizeof(chainCellCounts) + descSize;
@@ -1363,6 +1365,8 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info)
/* If applicable, mark low bit to denote thumb */
if (info->instructionSet != DALVIK_JIT_ARM)
info->codeAddress = (char*)info->codeAddress + 1;
+ /* transfer the size of the profiling code */
+ info->profileCodeSize = cUnit->profileCodeSize;
}
/*
@@ -1836,14 +1840,37 @@ static char *getTraceBase(const JitEntry *p)
(6 + (p->u.info.instructionSet == DALVIK_JIT_ARM ? 0 : 1));
}
+/* Handy function to retrieve the profile count */
+static inline JitTraceCounter_t getProfileCount(const JitEntry *entry)
+{
+ if (entry->dPC == 0 || entry->codeAddress == 0 ||
+ entry->codeAddress == dvmCompilerGetInterpretTemplate())
+ return 0;
+
+ JitTraceCounter_t **p = (JitTraceCounter_t **) getTraceBase(entry);
+
+ return **p;
+}
+
+/* Handy function to reset the profile count */
+static inline void resetProfileCount(const JitEntry *entry)
+{
+ if (entry->dPC == 0 || entry->codeAddress == 0 ||
+ entry->codeAddress == dvmCompilerGetInterpretTemplate())
+ return;
+
+ JitTraceCounter_t **p = (JitTraceCounter_t **) getTraceBase(entry);
+
+ **p = 0;
+}
+
/* Dumps profile info for a single trace */
static int dumpTraceProfile(JitEntry *p, bool silent, bool reset,
unsigned long sum)
{
ChainCellCounts* pCellCounts;
char* traceBase;
- u4* pExecutionCount;
- u4 executionCount;
+ JitTraceCounter_t count;
u2* pCellOffset;
JitTraceDescription *desc;
const Method* method;
@@ -1861,14 +1888,12 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset,
LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY 0 0", (int)traceBase);
return 0;
}
-
- pExecutionCount = (u4*) (traceBase);
- executionCount = *pExecutionCount;
+ count = getProfileCount(p);
if (reset) {
- *pExecutionCount =0;
+ resetProfileCount(p);
}
if (silent) {
- return executionCount;
+ return count;
}
pCellOffset = (u2*) (traceBase + 4);
pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset);
@@ -1893,8 +1918,8 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset,
LOGD("TRACEPROFILE 0x%08x % 10d %5.2f%% [%#x(+%d), %d] %s%s;%s",
(int)traceBase,
- executionCount,
- ((float ) executionCount) / sum * 100.0,
+ count,
+ ((float ) count) / sum * 100.0,
desc->trace[0].frag.startOffset,
desc->trace[0].frag.numInsts,
addrToLine.lineNum,
@@ -1919,7 +1944,7 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset,
methodDesc);
}
- return executionCount;
+ return count;
}
/* Create a copy of the trace descriptor of an existing compilation */
@@ -1948,27 +1973,14 @@ JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
return newCopy;
}
-/* Handy function to retrieve the profile count */
-static inline int getProfileCount(const JitEntry *entry)
-{
- if (entry->dPC == 0 || entry->codeAddress == 0 ||
- entry->codeAddress == dvmCompilerGetInterpretTemplate())
- return 0;
-
- u4 *pExecutionCount = (u4 *) getTraceBase(entry);
-
- return *pExecutionCount;
-}
-
-
/* qsort callback function */
static int sortTraceProfileCount(const void *entry1, const void *entry2)
{
const JitEntry *jitEntry1 = (const JitEntry *)entry1;
const JitEntry *jitEntry2 = (const JitEntry *)entry2;
- int count1 = getProfileCount(jitEntry1);
- int count2 = getProfileCount(jitEntry2);
+ JitTraceCounter_t count1 = getProfileCount(jitEntry1);
+ JitTraceCounter_t count2 = getProfileCount(jitEntry2);
return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
}
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 6473edb50..181a128fd 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -3536,7 +3536,6 @@ static void handleHotChainingCell(CompilationUnit *cUnit,
addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
}
-#if defined(WITH_SELF_VERIFICATION) || defined(WITH_JIT_TUNING)
/* Chaining cell for branches that branch back into the same basic block */
static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
unsigned int offset)
@@ -3558,7 +3557,6 @@ static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
addWordData(cUnit, (int) (cUnit->method->insns + offset), true);
}
-#endif
/* Chaining cell for monomorphic method invocations. */
static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
const Method *callee)
@@ -3944,39 +3942,8 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
GrowableListIterator iterator;
dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
- if (cUnit->executionCount) {
- /*
- * Reserve 6 bytes at the beginning of the trace
- * +----------------------------+
- * | execution count (4 bytes) |
- * +----------------------------+
- * | chain cell offset (2 bytes)|
- * +----------------------------+
- * ...and then code to increment the execution
- * count:
- * mov r0, pc @ move adr of "mov r0,pc" + 4 to r0
- * sub r0, #10 @ back up to addr of executionCount
- * ldr r1, [r0]
- * add r1, #1
- * str r1, [r0]
- */
- newLIR1(cUnit, kArm16BitData, 0);
- newLIR1(cUnit, kArm16BitData, 0);
- cUnit->chainCellOffsetLIR =
- (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG);
- cUnit->headerSize = 6;
- /* Thumb instruction used directly here to ensure correct size */
- newLIR2(cUnit, kThumbMovRR_H2L, r0, rpc);
- newLIR2(cUnit, kThumbSubRI8, r0, 10);
- newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0);
- newLIR2(cUnit, kThumbAddRI8, r1, 1);
- newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0);
- } else {
- /* Just reserve 2 bytes for the chain cell offset */
- cUnit->chainCellOffsetLIR =
- (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG);
- cUnit->headerSize = 2;
- }
+ /* Traces start with a profiling entry point. Generate it here */
+ cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
/* Handle the content in each basic block */
for (i = 0; ; i++) {
@@ -4062,7 +4029,6 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
opReg(cUnit, kOpBlx, r1);
}
break;
-#if defined(WITH_SELF_VERIFICATION) || defined(WITH_JIT_TUNING)
case kChainingCellBackwardBranch:
labelList[i].opcode =
kArmPseudoChainingCellBackwardBranch;
@@ -4071,7 +4037,6 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
&chainingListByType[kChainingCellBackwardBranch],
i);
break;
-#endif
default:
break;
}
@@ -4303,12 +4268,10 @@ gen_fallthrough:
case kChainingCellHot:
handleHotChainingCell(cUnit, chainingBlock->startOffset);
break;
-#if defined(WITH_SELF_VERIFICATION) || defined(WITH_JIT_TUNING)
case kChainingCellBackwardBranch:
handleBackwardBranchChainingCell(cUnit,
chainingBlock->startOffset);
break;
-#endif
default:
LOGE("Bad blocktype %d", chainingBlock->blockType);
dvmCompilerAbort(cUnit);
@@ -4342,11 +4305,15 @@ gen_fallthrough:
#endif
}
-/* Accept the work and start compiling */
+/*
+ * Accept the work and start compiling. Returns true if compilation
+ * is attempted.
+ */
bool dvmCompilerDoWork(CompilerWorkOrder *work)
{
JitTraceDescription *desc;
- bool res;
+ bool isCompile;
+ bool success = true;
if (gDvmJit.codeCacheFull) {
return false;
@@ -4354,27 +4321,35 @@ bool dvmCompilerDoWork(CompilerWorkOrder *work)
switch (work->kind) {
case kWorkOrderTrace:
+ isCompile = true;
/* Start compilation with maximally allowed trace length */
desc = (JitTraceDescription *)work->info;
- res = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
- work->bailPtr, 0 /* no hints */);
+ success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
+ work->bailPtr, 0 /* no hints */);
break;
case kWorkOrderTraceDebug: {
bool oldPrintMe = gDvmJit.printMe;
gDvmJit.printMe = true;
+ isCompile = true;
/* Start compilation with maximally allowed trace length */
desc = (JitTraceDescription *)work->info;
- res = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
- work->bailPtr, 0 /* no hints */);
+ success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
+ work->bailPtr, 0 /* no hints */);
gDvmJit.printMe = oldPrintMe;
break;
}
+ case kWorkOrderProfileMode:
+ dvmJitChangeProfileMode((TraceProfilingModes)work->info);
+ isCompile = false;
+ break;
default:
- res = false;
+ isCompile = false;
LOGE("Jit: unknown work order type");
assert(0); // Bail if debug build, discard otherwise
}
- return res;
+ if (!success)
+ work->result.codeAddress = NULL;
+ return isCompile;
}
/* Architectural-specific debugging helpers go here */
diff --git a/vm/compiler/codegen/arm/Thumb/Gen.c b/vm/compiler/codegen/arm/Thumb/Gen.c
index 07f3f092f..b80696595 100644
--- a/vm/compiler/codegen/arm/Thumb/Gen.c
+++ b/vm/compiler/codegen/arm/Thumb/Gen.c
@@ -23,6 +23,62 @@
*/
/*
+ * Reserve 6 bytes at the beginning of the trace
+ * +----------------------------+
+ * | prof count addr (4 bytes) |
+ * +----------------------------+
+ * | chain cell offset (2 bytes)|
+ * +----------------------------+
+ *
+ * ...and then code to increment the execution
+ *
+ * For continuous profiling (12 bytes):
+ *
+ * mov r0, pc @ move adr of "mov r0,pc" + 4 to r0
+ * sub r0, #10 @ back up to addr prof count pointer
+ * ldr r0, [r0] @ get address of counter
+ * ldr r1, [r0]
+ * add r1, #1
+ * str r1, [r0]
+ *
+ * For periodic profiling (4 bytes):
+ * call TEMPLATE_PERIODIC_PROFILING
+ *
+ * and return the size (in bytes) of the generated code.
+ */
+
+static int genTraceProfileEntry(CompilationUnit *cUnit)
+{
+ intptr_t addr = (intptr_t)dvmJitNextTraceCounter();
+ assert(__BYTE_ORDER == __LITTLE_ENDIAN);
+ newLIR1(cUnit, kArm16BitData, addr & 0xffff);
+ newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff);
+ cUnit->chainCellOffsetLIR =
+ (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG);
+ cUnit->headerSize = 6;
+ if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
+ (gDvmJit.profileMode == kTraceProfilingDisabled)) {
+ /* Thumb instruction used directly here to ensure correct size */
+ newLIR2(cUnit, kThumbMovRR_H2L, r0, rpc);
+ newLIR2(cUnit, kThumbSubRI8, r0, 10);
+ newLIR3(cUnit, kThumbLdrRRI5, r0, r0, 0);
+ newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0);
+ newLIR2(cUnit, kThumbAddRI8, r1, 1);
+ newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0);
+ return 12;
+ } else {
+ int opcode = TEMPLATE_PERIODIC_PROFILING;
+ newLIR2(cUnit, kThumbBlx1,
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode],
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode]);
+ newLIR2(cUnit, kThumbBlx2,
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode],
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode]);
+ return 4;
+ }
+}
+
+/*
* Perform a "reg cmp imm" operation and jump to the PCR region if condition
* satisfies.
*/
diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c
index 0891524f7..f5e1096ea 100644
--- a/vm/compiler/codegen/arm/Thumb2/Gen.c
+++ b/vm/compiler/codegen/arm/Thumb2/Gen.c
@@ -15,13 +15,64 @@
*/
/*
- * This file contains codegen for the Thumb ISA and is intended to be
+ * This file contains codegen for the Thumb2 ISA and is intended to be
* includes by:
*
* Codegen-$(TARGET_ARCH_VARIANT).c
*
*/
+/*
+ * Reserve 6 bytes at the beginning of the trace
+ * +----------------------------+
+ * | prof count addr (4 bytes) |
+ * +----------------------------+
+ * | chain cell offset (2 bytes)|
+ * +----------------------------+
+ *
+ * ...and then code to increment the execution
+ *
+ * For continuous profiling (10 bytes)
+ * ldr r0, [pc-8] @ get prof count addr [4 bytes]
+ * ldr r1, [r0] @ load counter [2 bytes]
+ * add r1, #1 @ increment [2 bytes]
+ * str r1, [r0] @ store [2 bytes]
+ *
+ * For periodic profiling (4 bytes)
+ * call TEMPLATE_PERIODIC_PROFILING
+ *
+ * and return the size (in bytes) of the generated code.
+ */
+
+static int genTraceProfileEntry(CompilationUnit *cUnit)
+{
+ intptr_t addr = (intptr_t)dvmJitNextTraceCounter();
+ assert(__BYTE_ORDER == __LITTLE_ENDIAN);
+ newLIR1(cUnit, kArm16BitData, addr & 0xffff);
+ newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff);
+ cUnit->chainCellOffsetLIR =
+ (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG);
+ cUnit->headerSize = 6;
+ if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
+ (gDvmJit.profileMode == kTraceProfilingDisabled)) {
+ /* Thumb[2] instruction used directly here to ensure correct size */
+ newLIR2(cUnit, kThumb2LdrPcReln12, r0, 8);
+ newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0);
+ newLIR2(cUnit, kThumbAddRI8, r1, 1);
+ newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0);
+ return 10;
+ } else {
+ int opcode = TEMPLATE_PERIODIC_PROFILING;
+ newLIR2(cUnit, kThumbBlx1,
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode],
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode]);
+ newLIR2(cUnit, kThumbBlx2,
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode],
+ (int) gDvmJit.codeCache + templateEntryOffsets[opcode]);
+ return 4;
+ }
+}
+
static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest,
RegLocation rlSrc)
{
diff --git a/vm/compiler/template/armv5te-vfp/TemplateOpList.h b/vm/compiler/template/armv5te-vfp/TemplateOpList.h
index d991bedb0..97addfaf8 100644
--- a/vm/compiler/template/armv5te-vfp/TemplateOpList.h
+++ b/vm/compiler/template/armv5te-vfp/TemplateOpList.h
@@ -57,3 +57,4 @@ JIT_TEMPLATE(STRING_INDEXOF)
JIT_TEMPLATE(INTERPRET)
JIT_TEMPLATE(MONITOR_ENTER)
JIT_TEMPLATE(MONITOR_ENTER_DEBUG)
+JIT_TEMPLATE(PERIODIC_PROFILING)
diff --git a/vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S b/vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S
new file mode 100644
index 000000000..7f7109646
--- /dev/null
+++ b/vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S
@@ -0,0 +1,26 @@
+ /*
+ * Increment profile counter for this trace, and decrement
+ * sample counter. If sample counter goes below zero, turn
+ * off profiling.
+ *
+ * On entry
+ * (lr-11) is address of pointer to counter. Note: the counter
+ * actually exists 10 bytes before the return target, but because
+ * we are arriving from thumb mode, lr will have its low bit set.
+ */
+ ldr r0, [lr,#-11]
+ ldr r1, [rGLUE, #offGlue_pProfileCountdown]
+ ldr r2, [r0] @ get counter
+ ldr r3, [r1] @ get countdown timer
+ add r2, #1
+ subs r2, #1
+ blt .L${opcode}_disable_profiling
+ str r2, [r0]
+ str r3, [r1]
+ bx lr
+
+.L${opcode}_disable_profiling:
+ mov r4, lr @ preserve lr
+ ldr r0, .LdvmJitTraceProfilingOff
+ blx r0
+ bx r4
diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h
index e81383c5b..663e0df68 100644
--- a/vm/compiler/template/armv5te/TemplateOpList.h
+++ b/vm/compiler/template/armv5te/TemplateOpList.h
@@ -42,3 +42,4 @@ JIT_TEMPLATE(STRING_INDEXOF)
JIT_TEMPLATE(INTERPRET)
JIT_TEMPLATE(MONITOR_ENTER)
JIT_TEMPLATE(MONITOR_ENTER_DEBUG)
+JIT_TEMPLATE(PERIODIC_PROFILING)
diff --git a/vm/compiler/template/armv5te/footer.S b/vm/compiler/template/armv5te/footer.S
index ba0335b30..7b35e8a1c 100644
--- a/vm/compiler/template/armv5te/footer.S
+++ b/vm/compiler/template/armv5te/footer.S
@@ -104,6 +104,8 @@
.word dvmMterpCommonExceptionThrown
.LdvmLockObject:
.word dvmLockObject
+.LdvmJitTraceProfilingOff:
+ .word dvmJitTraceProfilingOff
#if defined(WITH_JIT_TUNING)
.LdvmICHitCount:
.word gDvmICHitCount
diff --git a/vm/compiler/template/armv7-a-neon/TemplateOpList.h b/vm/compiler/template/armv7-a-neon/TemplateOpList.h
index d991bedb0..97addfaf8 100644
--- a/vm/compiler/template/armv7-a-neon/TemplateOpList.h
+++ b/vm/compiler/template/armv7-a-neon/TemplateOpList.h
@@ -57,3 +57,4 @@ JIT_TEMPLATE(STRING_INDEXOF)
JIT_TEMPLATE(INTERPRET)
JIT_TEMPLATE(MONITOR_ENTER)
JIT_TEMPLATE(MONITOR_ENTER_DEBUG)
+JIT_TEMPLATE(PERIODIC_PROFILING)
diff --git a/vm/compiler/template/armv7-a/TemplateOpList.h b/vm/compiler/template/armv7-a/TemplateOpList.h
index d991bedb0..97addfaf8 100644
--- a/vm/compiler/template/armv7-a/TemplateOpList.h
+++ b/vm/compiler/template/armv7-a/TemplateOpList.h
@@ -57,3 +57,4 @@ JIT_TEMPLATE(STRING_INDEXOF)
JIT_TEMPLATE(INTERPRET)
JIT_TEMPLATE(MONITOR_ENTER)
JIT_TEMPLATE(MONITOR_ENTER_DEBUG)
+JIT_TEMPLATE(PERIODIC_PROFILING)
diff --git a/vm/compiler/template/config-armv5te-vfp b/vm/compiler/template/config-armv5te-vfp
index 1b02261eb..30b9200a8 100644
--- a/vm/compiler/template/config-armv5te-vfp
+++ b/vm/compiler/template/config-armv5te-vfp
@@ -48,6 +48,7 @@ op-start armv5te-vfp
op TEMPLATE_INTERPRET armv5te
op TEMPLATE_MONITOR_ENTER armv5te
op TEMPLATE_MONITOR_ENTER_DEBUG armv5te
+ op TEMPLATE_PERIODIC_PROFILING armv5te
op-end
diff --git a/vm/compiler/template/config-armv7-a b/vm/compiler/template/config-armv7-a
index be7af31e4..1408ca117 100644
--- a/vm/compiler/template/config-armv7-a
+++ b/vm/compiler/template/config-armv7-a
@@ -48,6 +48,7 @@ op-start armv5te-vfp
op TEMPLATE_INTERPRET armv5te
op TEMPLATE_MONITOR_ENTER armv5te
op TEMPLATE_MONITOR_ENTER_DEBUG armv5te
+ op TEMPLATE_PERIODIC_PROFILING armv5te
op-end
# "helper" code for C; include if you use any of the C stubs (this generates
diff --git a/vm/compiler/template/config-armv7-a-neon b/vm/compiler/template/config-armv7-a-neon
index be7af31e4..1408ca117 100644
--- a/vm/compiler/template/config-armv7-a-neon
+++ b/vm/compiler/template/config-armv7-a-neon
@@ -48,6 +48,7 @@ op-start armv5te-vfp
op TEMPLATE_INTERPRET armv5te
op TEMPLATE_MONITOR_ENTER armv5te
op TEMPLATE_MONITOR_ENTER_DEBUG armv5te
+ op TEMPLATE_PERIODIC_PROFILING armv5te
op-end
# "helper" code for C; include if you use any of the C stubs (this generates
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
index 8efbcaa60..a107b24de 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -1473,6 +1473,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
#endif
ldr pc, .LdvmJitToInterpNoChain
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
+dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
+/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */
+ /*
+ * Increment profile counter for this trace, and decrement
+ * sample counter. If sample counter goes below zero, turn
+ * off profiling.
+ *
+ * On entry
+ * (lr-11) is address of pointer to counter. Note: the counter
+ * actually exists 10 bytes before the return target, but because
+ * we are arriving from thumb mode, lr will have its low bit set.
+ */
+ ldr r0, [lr,#-11]
+ ldr r1, [rGLUE, #offGlue_pProfileCountdown]
+ ldr r2, [r0] @ get counter
+ ldr r3, [r1] @ get countdown timer
+ add r2, #1
+ subs r2, #1
+ blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
+ str r2, [r0]
+ str r3, [r1]
+ bx lr
+
+.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
+ mov r4, lr @ preserve lr
+ ldr r0, .LdvmJitTraceProfilingOff
+ blx r0
+ bx r4
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*
@@ -1581,6 +1613,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
.word dvmMterpCommonExceptionThrown
.LdvmLockObject:
.word dvmLockObject
+.LdvmJitTraceProfilingOff:
+ .word dvmJitTraceProfilingOff
#if defined(WITH_JIT_TUNING)
.LdvmICHitCount:
.word gDvmICHitCount
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
index 0df3ae65a..a6a0e9ff8 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S
@@ -1204,6 +1204,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
#endif
ldr pc, .LdvmJitToInterpNoChain
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
+dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
+/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */
+ /*
+ * Increment profile counter for this trace, and decrement
+ * sample counter. If sample counter goes below zero, turn
+ * off profiling.
+ *
+ * On entry
+ * (lr-11) is address of pointer to counter. Note: the counter
+ * actually exists 10 bytes before the return target, but because
+ * we are arriving from thumb mode, lr will have its low bit set.
+ */
+ ldr r0, [lr,#-11]
+ ldr r1, [rGLUE, #offGlue_pProfileCountdown]
+ ldr r2, [r0] @ get counter
+ ldr r3, [r1] @ get countdown timer
+ add r2, #1
+ subs r2, #1
+ blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
+ str r2, [r0]
+ str r3, [r1]
+ bx lr
+
+.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
+ mov r4, lr @ preserve lr
+ ldr r0, .LdvmJitTraceProfilingOff
+ blx r0
+ bx r4
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*
@@ -1312,6 +1344,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
.word dvmMterpCommonExceptionThrown
.LdvmLockObject:
.word dvmLockObject
+.LdvmJitTraceProfilingOff:
+ .word dvmJitTraceProfilingOff
#if defined(WITH_JIT_TUNING)
.LdvmICHitCount:
.word gDvmICHitCount
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
index ee3f8cbec..e4ed30bef 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S
@@ -1473,6 +1473,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
#endif
ldr pc, .LdvmJitToInterpNoChain
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
+dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
+/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */
+ /*
+ * Increment profile counter for this trace, and decrement
+ * sample counter. If sample counter goes below zero, turn
+ * off profiling.
+ *
+ * On entry
+ * (lr-11) is address of pointer to counter. Note: the counter
+ * actually exists 10 bytes before the return target, but because
+ * we are arriving from thumb mode, lr will have its low bit set.
+ */
+ ldr r0, [lr,#-11]
+ ldr r1, [rGLUE, #offGlue_pProfileCountdown]
+ ldr r2, [r0] @ get counter
+ ldr r3, [r1] @ get countdown timer
+ add r2, #1
+ subs r2, #1
+ blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
+ str r2, [r0]
+ str r3, [r1]
+ bx lr
+
+.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
+ mov r4, lr @ preserve lr
+ ldr r0, .LdvmJitTraceProfilingOff
+ blx r0
+ bx r4
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*
@@ -1581,6 +1613,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
.word dvmMterpCommonExceptionThrown
.LdvmLockObject:
.word dvmLockObject
+.LdvmJitTraceProfilingOff:
+ .word dvmJitTraceProfilingOff
#if defined(WITH_JIT_TUNING)
.LdvmICHitCount:
.word gDvmICHitCount
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
index 3875f5a24..fc26b3a95 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
@@ -1473,6 +1473,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
#endif
ldr pc, .LdvmJitToInterpNoChain
+/* ------------------------------ */
+ .balign 4
+ .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
+dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
+/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */
+ /*
+ * Increment profile counter for this trace, and decrement
+ * sample counter. If sample counter goes below zero, turn
+ * off profiling.
+ *
+ * On entry
+ * (lr-11) is address of pointer to counter. Note: the counter
+ * actually exists 10 bytes before the return target, but because
+ * we are arriving from thumb mode, lr will have its low bit set.
+ */
+ ldr r0, [lr,#-11]
+ ldr r1, [rGLUE, #offGlue_pProfileCountdown]
+ ldr r2, [r0] @ get counter
+ ldr r3, [r1] @ get countdown timer
+ add r2, #1
+ subs r2, #1
+ blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
+ str r2, [r0]
+ str r3, [r1]
+ bx lr
+
+.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
+ mov r4, lr @ preserve lr
+ ldr r0, .LdvmJitTraceProfilingOff
+ blx r0
+ bx r4
+
.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
/* File: armv5te/footer.S */
/*
@@ -1581,6 +1613,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
.word dvmMterpCommonExceptionThrown
.LdvmLockObject:
.word dvmLockObject
+.LdvmJitTraceProfilingOff:
+ .word dvmJitTraceProfilingOff
#if defined(WITH_JIT_TUNING)
.LdvmICHitCount:
.word gDvmICHitCount
diff --git a/vm/interp/InterpDefs.h b/vm/interp/InterpDefs.h
index 3c0d2e39e..2a20bfc2e 100644
--- a/vm/interp/InterpDefs.h
+++ b/vm/interp/InterpDefs.h
@@ -161,18 +161,13 @@ typedef struct InterpState {
*/
unsigned char** ppJitProfTable; // Used to refresh pJitProfTable
int icRechainCount; // Count down to next rechain request
- const void* jitCacheStart; // Code cache boundaries
- const void* jitCacheEnd;
-#endif
+ const void* pProfileCountdown; // Address of profile countdown timer
- bool debugIsMethodEntry; // used for method entry event triggers
-#if defined(WITH_TRACKREF_CHECKS)
- int debugTrackedRefStart; // tracked refs from prior invocations
-#endif
-
-#if defined(WITH_JIT)
struct JitToInterpEntries jitToInterpEntries;
+ const void* jitCacheStart; // Code cache boundaries
+ const void* jitCacheEnd;
+
int currTraceRun;
int totalTraceLen; // Number of Dalvik insts in trace
const u2* currTraceHead; // Start of the trace we're building
@@ -185,6 +180,12 @@ typedef struct InterpState {
double calleeSave[JIT_CALLEE_SAVE_DOUBLE_COUNT];
#endif
+ bool debugIsMethodEntry; // used for method entry event triggers
+#if defined(WITH_TRACKREF_CHECKS)
+ int debugTrackedRefStart; // tracked refs from prior invocations
+#endif
+
+
} InterpState;
/*
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index 8eb985639..5cfd28d3a 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -511,7 +511,7 @@ void dvmJitStats()
LOGD("JIT: %d Translation chains, %d interp stubs",
gDvmJit.translationChains, stubs);
- if (gDvmJit.profile) {
+ if (gDvmJit.profileMode == kTraceProfilingContinuous) {
dvmCompilerSortAndPrintTraceProfiles();
}
}
@@ -987,23 +987,30 @@ void* dvmJitGetCodeAddr(const u2* dPC)
const u2* npc = gDvmJit.pJitEntryTable[idx].dPC;
if (npc != NULL) {
bool hideTranslation = dvmJitHideTranslation();
-
if (npc == dPC) {
+ int offset = (gDvmJit.profileMode >= kTraceProfilingContinuous) ?
+ 0 : gDvmJit.pJitEntryTable[idx].u.info.profileOffset;
+ intptr_t codeAddress =
+ (intptr_t)gDvmJit.pJitEntryTable[idx].codeAddress;
#if defined(WITH_JIT_TUNING)
gDvmJit.addrLookupsFound++;
#endif
- return hideTranslation ?
- NULL : gDvmJit.pJitEntryTable[idx].codeAddress;
+ return hideTranslation ? NULL : (void *)(codeAddress + offset);
} else {
int chainEndMarker = gDvmJit.jitTableSize;
while (gDvmJit.pJitEntryTable[idx].u.info.chain != chainEndMarker) {
idx = gDvmJit.pJitEntryTable[idx].u.info.chain;
if (gDvmJit.pJitEntryTable[idx].dPC == dPC) {
+ int offset = (gDvmJit.profileMode >=
+ kTraceProfilingContinuous) ? 0 :
+ gDvmJit.pJitEntryTable[idx].u.info.profileOffset;
+ intptr_t codeAddress =
+ (intptr_t)gDvmJit.pJitEntryTable[idx].codeAddress;
#if defined(WITH_JIT_TUNING)
gDvmJit.addrLookupsFound++;
#endif
- return hideTranslation ?
- NULL : gDvmJit.pJitEntryTable[idx].codeAddress;
+ return hideTranslation ? NULL :
+ (void *)(codeAddress + offset);
}
}
}
@@ -1019,9 +1026,16 @@ void* dvmJitGetCodeAddr(const u2* dPC)
* NOTE: Once a codeAddress field transitions from initial state to
* JIT'd code, it must not be altered without first halting all
* threads. This routine should only be called by the compiler
- * thread.
+ * thread. We defer the setting of the profile prefix size until
+ * after the new code address is set to ensure that the prefix offset
+ * is never applied to the initial interpret-only translation. All
+ * translations with non-zero profile prefixes will still be correct
+ * if entered as if the profile offset is 0, but the interpret-only
+ * template cannot handle a non-zero prefix.
*/
-void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set) {
+void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set,
+ int profilePrefixSize)
+{
JitEntryInfoUnion oldValue;
JitEntryInfoUnion newValue;
JitEntry *jitEntry = lookupAndAdd(dPC, false);
@@ -1035,6 +1049,8 @@ void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set) {
oldValue.infoWord, newValue.infoWord,
&jitEntry->u.infoWord) != 0);
jitEntry->codeAddress = nPC;
+ newValue.info.profileOffset = profilePrefixSize;
+ jitEntry->u = newValue;
}
/*
@@ -1286,6 +1302,7 @@ bool dvmJitResizeJitTable( unsigned int size )
p->u.info.chain = chain;
}
}
+
dvmUnlockMutex(&gDvmJit.tableLock);
free(pOldTable);
@@ -1306,6 +1323,15 @@ void dvmJitResetTable(void)
unsigned int i;
dvmLockMutex(&gDvmJit.tableLock);
+
+ /* Note: If need to preserve any existing counts. Do so here. */
+ for (i=0; i < JIT_PROF_BLOCK_BUCKETS; i++) {
+ if (gDvmJit.pJitTraceProfCounters->buckets[i])
+ memset((void *) gDvmJit.pJitTraceProfCounters->buckets[i],
+ 0, sizeof(JitTraceCounter_t) * JIT_PROF_BLOCK_ENTRIES);
+ }
+ gDvmJit.pJitTraceProfCounters->next = 0;
+
memset((void *) jitEntry, 0, sizeof(JitEntry) * size);
for (i=0; i< size; i++) {
jitEntry[i].u.info.chain = size; /* Initialize chain termination */
@@ -1315,6 +1341,31 @@ void dvmJitResetTable(void)
}
/*
+ * Return the address of the next trace profile counter. This address
+ * will be embedded in the generated code for the trace, and thus cannot
+ * change while the trace exists.
+ */
+JitTraceCounter_t *dvmJitNextTraceCounter()
+{
+ int idx = gDvmJit.pJitTraceProfCounters->next / JIT_PROF_BLOCK_ENTRIES;
+ int elem = gDvmJit.pJitTraceProfCounters->next % JIT_PROF_BLOCK_ENTRIES;
+ JitTraceCounter_t *res;
+ /* Lazily allocate blocks of counters */
+ if (!gDvmJit.pJitTraceProfCounters->buckets[idx]) {
+ JitTraceCounter_t *p =
+ (JitTraceCounter_t*) calloc(JIT_PROF_BLOCK_ENTRIES, sizeof(*p));
+ if (!p) {
+ LOGE("Failed to allocate block of trace profile counters");
+ dvmAbort();
+ }
+ gDvmJit.pJitTraceProfCounters->buckets[idx] = p;
+ }
+ res = &gDvmJit.pJitTraceProfCounters->buckets[idx][elem];
+ gDvmJit.pJitTraceProfCounters->next++;
+ return res;
+}
+
+/*
* Float/double conversion requires clamping to min and max of integer form. If
* target doesn't support this normally, use these.
*/
@@ -1346,4 +1397,33 @@ s8 dvmJitf2l(float f)
return (s8)f;
}
+/* Should only be called by the compiler thread */
+void dvmJitChangeProfileMode(TraceProfilingModes newState)
+{
+ if (gDvmJit.profileMode != newState) {
+ gDvmJit.profileMode = newState;
+ dvmJitUnchainAll();
+ }
+}
+
+void dvmJitTraceProfilingOn()
+{
+ if (gDvmJit.profileMode == kTraceProfilingPeriodicOff)
+ dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode,
+ (void*) kTraceProfilingPeriodicOn);
+ else if (gDvmJit.profileMode == kTraceProfilingDisabled)
+ dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode,
+ (void*) kTraceProfilingContinuous);
+}
+
+void dvmJitTraceProfilingOff()
+{
+ if (gDvmJit.profileMode == kTraceProfilingPeriodicOn)
+ dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode,
+ (void*) kTraceProfilingPeriodicOff);
+ else if (gDvmJit.profileMode == kTraceProfilingContinuous)
+ dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode,
+ (void*) kTraceProfilingDisabled);
+}
+
#endif /* WITH_JIT */
diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h
index 6101f54ae..dda14d2bf 100644
--- a/vm/interp/Jit.h
+++ b/vm/interp/Jit.h
@@ -83,6 +83,29 @@ static inline u4 dvmJitHash( const u2* p ) {
}
/*
+ * The width of the chain field in JitEntryInfo sets the upper
+ * bound on the number of translations. Be careful if changing
+ * the size of JitEntry struct - the Dalvik PC to JitEntry
+ * hash functions have built-in knowledge of the size.
+ */
+#define JIT_ENTRY_CHAIN_WIDTH 2
+#define JIT_MAX_ENTRIES (1 << (JIT_ENTRY_CHAIN_WIDTH * 8))
+
+/*
+ * The trace profiling counters are allocated in blocks and individual
+ * counters must not move so long as any referencing trace exists.
+ */
+#define JIT_PROF_BLOCK_ENTRIES 1024
+#define JIT_PROF_BLOCK_BUCKETS (JIT_MAX_ENTRIES / JIT_PROF_BLOCK_ENTRIES)
+
+typedef s4 JitTraceCounter_t;
+
+typedef struct JitTraceProfCounters {
+ unsigned int next;
+ JitTraceCounter_t *buckets[JIT_PROF_BLOCK_BUCKETS];
+} JitTraceProfCounters;
+
+/*
* Entries in the JIT's address lookup hash table.
* Fields which may be updated by multiple threads packed into a
* single 32-bit word to allow use of atomic update.
@@ -94,7 +117,7 @@ typedef struct JitEntryInfo {
unsigned int inlineCandidate:1;
unsigned int profileEnabled:1;
JitInstructionSetType instructionSet:4;
- unsigned int unused:8;
+ unsigned int profileOffset:8;
u2 chain; /* Index of next in chain */
} JitEntryInfo;
@@ -120,7 +143,12 @@ void dvmJitResetTable(void);
struct JitEntry *dvmFindJitEntry(const u2* pc);
s8 dvmJitd2l(double d);
s8 dvmJitf2l(float f);
-void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set);
+void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set,
+ int profilePrefixSize);
void dvmJitAbortTraceSelect(InterpState* interpState);
+JitTraceCounter_t *dvmJitNextTraceCounter(void);
+void dvmJitTraceProfilingOff(void);
+void dvmJitTraceProfilingOn(void);
+void dvmJitChangeProfileMode(TraceProfilingModes newState);
#endif /*_DALVIK_INTERP_JIT*/
diff --git a/vm/mterp/Mterp.c b/vm/mterp/Mterp.c
index 0cd5a1fb2..68218a3da 100644
--- a/vm/mterp/Mterp.c
+++ b/vm/mterp/Mterp.c
@@ -83,6 +83,7 @@ bool dvmMterpStd(Thread* self, InterpState* glue)
glue->jitThreshold = gDvmJit.threshold;
glue->jitCacheStart = gDvmJit.codeCache;
glue->jitCacheEnd = (char*)gDvmJit.codeCache + gDvmJit.codeCacheSize;
+ glue->pProfileCountdown = &gDvmJit.profileCountdown;
#endif
#if defined(WITH_INLINE_PROFILING)
/*
diff --git a/vm/mterp/common/asm-constants.h b/vm/mterp/common/asm-constants.h
index e5a8a0429..8af792914 100644
--- a/vm/mterp/common/asm-constants.h
+++ b/vm/mterp/common/asm-constants.h
@@ -107,8 +107,14 @@ MTERP_OFFSET(offGlue_jitResumeDPC, MterpGlue, jitResumeDPC, 72)
MTERP_OFFSET(offGlue_jitThreshold, MterpGlue, jitThreshold, 76)
MTERP_OFFSET(offGlue_ppJitProfTable, MterpGlue, ppJitProfTable, 80)
MTERP_OFFSET(offGlue_icRechainCount, MterpGlue, icRechainCount, 84)
-MTERP_OFFSET(offGlue_jitCacheStart, MterpGlue, jitCacheStart, 88)
-MTERP_OFFSET(offGlue_jitCacheEnd, MterpGlue, jitCacheEnd, 92)
+MTERP_OFFSET(offGlue_pProfileCountdown, MterpGlue, pProfileCountdown, 88)
+#if defined(WITH_SELF_VERIFICATION)
+MTERP_OFFSET(offGlue_jitCacheStart, MterpGlue, jitCacheStart, 124)
+MTERP_OFFSET(offGlue_jitCacheEnd, MterpGlue, jitCacheEnd, 128)
+#else
+MTERP_OFFSET(offGlue_jitCacheStart, MterpGlue, jitCacheStart, 120)
+MTERP_OFFSET(offGlue_jitCacheEnd, MterpGlue, jitCacheEnd, 124)
+#endif
#endif
/* make sure all JValue union members are stored at the same offset */
MTERP_OFFSET(offGlue_retval_z, MterpGlue, retval.z, 8)