diff options
29 files changed, 534 insertions, 154 deletions
diff --git a/vm/Globals.h b/vm/Globals.h index 25c485f57..e761081db 100644 --- a/vm/Globals.h +++ b/vm/Globals.h @@ -682,6 +682,14 @@ extern struct DvmGlobals gDvm; #if defined(WITH_JIT) +/* Trace profiling modes. Ordering matters - off states before on states */ +typedef enum TraceProfilingModes { + kTraceProfilingDisabled = 0, // Not profiling + kTraceProfilingPeriodicOff = 1, // Periodic profiling, off phase + kTraceProfilingContinuous = 2, // Always profiling + kTraceProfilingPeriodicOn = 3 // Periodic profiling, on phase +} TraceProfilingModes; + /* * Exiting the compiled code w/o chaining will incur overhead to look up the * target in the code cache which is extra work only when JIT is enabled. So @@ -720,9 +728,12 @@ struct DvmJitGlobals { * are stored in each thread. */ struct JitEntry *pJitEntryTable; - /* Array of profile threshold counters */ + /* Array of compilation trigger threshold counters */ unsigned char *pProfTable; + /* Trace profiling counters */ + struct JitTraceProfCounters *pJitTraceProfCounters; + /* Copy of pProfTable used for temporarily disabling the Jit */ unsigned char *pProfTableCopy; @@ -801,8 +812,11 @@ struct DvmJitGlobals { /* Flag to dump all compiled code */ bool printMe; - /* Flag to count trace execution */ - bool profile; + /* Trace profiling mode */ + TraceProfilingModes profileMode; + + /* Periodic trace profiling countdown timer */ + int profileCountdown; /* Vector to disable selected optimizations */ int disableOpt; @@ -949,7 +949,7 @@ static int dvmProcessOptions(int argc, const char* const argv[], } else if (strncmp(argv[i], "-Xjitverbose", 12) == 0) { gDvmJit.printMe = true; } else if (strncmp(argv[i], "-Xjitprofile", 12) == 0) { - gDvmJit.profile = true; + gDvmJit.profileMode = kTraceProfilingContinuous; } else if (strncmp(argv[i], "-Xjitdisableopt", 15) == 0) { /* Disable selected optimizations */ if (argv[i][15] == ':') { diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c index c8ff62ee3..adb58dd4b 100644 --- a/vm/compiler/Compiler.c +++ b/vm/compiler/Compiler.c @@ -332,6 +332,7 @@ static bool compilerThreadStartup(void) { JitEntry *pJitTable = NULL; unsigned char *pJitProfTable = NULL; + JitTraceProfCounters *pJitTraceProfCounters = NULL; unsigned int i; if (!dvmCompilerArchInit()) @@ -398,6 +399,15 @@ static bool compilerThreadStartup(void) /* Is chain field wide enough for termination pattern? */ assert(pJitTable[0].u.info.chain == gDvmJit.jitTableSize); + /* Allocate the trace profiling structure */ + pJitTraceProfCounters = (JitTraceProfCounters*) + calloc(1, sizeof(*pJitTraceProfCounters)); + if (!pJitTraceProfCounters) { + LOGE("jit trace prof counters allocation failed\n"); + dvmUnlockMutex(&gDvmJit.tableLock); + goto fail; + } + gDvmJit.pJitEntryTable = pJitTable; gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; gDvmJit.jitTableEntriesUsed = 0; @@ -409,6 +419,7 @@ static bool compilerThreadStartup(void) */ gDvmJit.pProfTable = dvmDebuggerOrProfilerActive() ? NULL : pJitProfTable; gDvmJit.pProfTableCopy = pJitProfTable; + gDvmJit.pJitTraceProfCounters = pJitTraceProfCounters; dvmUnlockMutex(&gDvmJit.tableLock); /* Signal running threads to refresh their cached pJitTable pointers */ @@ -620,27 +631,19 @@ static void *compilerThreadStart(void *arg) if (gDvmJit.haltCompilerThread) { LOGD("Compiler shutdown in progress - discarding request"); } else if (!gDvmJit.codeCacheFull) { - bool compileOK = false; jmp_buf jmpBuf; work.bailPtr = &jmpBuf; bool aborted = setjmp(jmpBuf); if (!aborted) { - compileOK = dvmCompilerDoWork(&work); - } - if (aborted || !compileOK) { -#if 0 // for x86 JIT testing - dvmJitSetCodeAddr(work.pc, - dvmCompilerGetInterpretTemplate(), - work.result.instructionSet); -#endif - dvmCompilerArenaReset(); - } else if (!work.result.discardResult && - work.result.codeAddress) { - /* Make sure that proper code addr is installed */ - assert(work.result.codeAddress != NULL); - dvmJitSetCodeAddr(work.pc, work.result.codeAddress, - work.result.instructionSet); + bool codeCompiled = dvmCompilerDoWork(&work); + if (codeCompiled && !work.result.discardResult && + work.result.codeAddress) { + dvmJitSetCodeAddr(work.pc, work.result.codeAddress, + work.result.instructionSet, + work.result.profileCodeSize); + } } + dvmCompilerArenaReset(); } free(work.info); #if defined(WITH_JIT_TUNING) @@ -697,7 +700,8 @@ void dvmCompilerShutdown(void) gDvmJit.pProfTable = NULL; gDvmJit.pProfTableCopy = NULL; - if (gDvm.verboseShutdown) { + if (gDvm.verboseShutdown || + gDvmJit.profileMode == kTraceProfilingContinuous) { dvmCompilerDumpStats(); while (gDvmJit.compilerQueueLength) sleep(5); diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h index 0a43df3c9..cd9d21bb3 100644 --- a/vm/compiler/Compiler.h +++ b/vm/compiler/Compiler.h @@ -45,15 +45,8 @@ #define COMPILER_TRACE_CHAINING(X) /* Macro to change the permissions applied to a chunk of the code cache */ -#if !defined(WITH_JIT_TUNING) #define PROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC) #define UNPROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC | PROT_WRITE) -#else -/* When doing JIT profiling always grant the write permission */ -#define PROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC | \ - (gDvmJit.profile ? PROT_WRITE : 0)) -#define UNPROTECT_CODE_CACHE_ATTRS (PROT_READ | PROT_EXEC | PROT_WRITE) -#endif /* Acquire the lock before removing PROT_WRITE from the specified mem region */ #define UNPROTECT_CODE_CACHE(addr, size) \ @@ -90,6 +83,7 @@ typedef enum JitInstructionSetType { typedef struct JitTranslationInfo { void *codeAddress; JitInstructionSetType instructionSet; + int profileCodeSize; bool discardResult; // Used for debugging divergence and IC patching bool methodCompilationAborted; // Cannot compile the whole method Thread *requestingThread; // For debugging purpose @@ -100,6 +94,7 @@ typedef enum WorkOrderKind { kWorkOrderMethod = 1, // Work is to compile a whole method kWorkOrderTrace = 2, // Work is to compile code fragment(s) kWorkOrderTraceDebug = 3, // Work is to compile/debug code fragment(s) + kWorkOrderProfileMode = 4, // Change profiling mode } WorkOrderKind; typedef struct CompilerWorkOrder { diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h index caf6fa617..54d41a5c5 100644 --- a/vm/compiler/CompilerIR.h +++ b/vm/compiler/CompilerIR.h @@ -206,11 +206,11 @@ typedef struct CompilationUnit { void *baseAddr; bool printMe; bool allSingleStep; - bool executionCount; // Add code to count trace executions bool hasLoop; // Contains a loop bool hasInvoke; // Contains an invoke instruction bool heapMemOp; // Mark mem ops for self verification bool wholeMethod; + int profileCodeSize; // Size of the profile prefix in bytes int numChainingCells[kChainingCellGap]; LIR *firstChainingLIR[kChainingCellGap]; LIR *chainingCellBottom; diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c index 95ef026c0..915e5f3fe 100644 --- a/vm/compiler/Frontend.c +++ b/vm/compiler/Frontend.c @@ -458,9 +458,6 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, /* Initialize the printMe flag */ cUnit.printMe = gDvmJit.printMe; - /* Initialize the profile flag */ - cUnit.executionCount = gDvmJit.profile; - /* Setup the method */ cUnit.method = desc->method; @@ -634,6 +631,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, for (blockId = 0; blockId < blockList->numUsed; blockId++) { curBB = (BasicBlock *) dvmGrowableListGetElement(blockList, blockId); MIR *lastInsn = curBB->lastMIRInsn; + BasicBlock *backwardCell; /* Skip empty blocks */ if (lastInsn == NULL) { continue; @@ -708,25 +706,11 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, exitBB->needFallThroughBranch = true; loopBranch->taken = exitBB; -#if defined(WITH_SELF_VERIFICATION) - BasicBlock *backwardCell = + backwardCell = dvmCompilerNewBB(kChainingCellBackwardBranch, numBlocks++); dvmInsertGrowableList(blockList, (intptr_t) backwardCell); backwardCell->startOffset = entryCodeBB->startOffset; loopBranch->fallThrough = backwardCell; -#elif defined(WITH_JIT_TUNING) - if (gDvmJit.profile) { - BasicBlock *backwardCell = - dvmCompilerNewBB(kChainingCellBackwardBranch, numBlocks++); - dvmInsertGrowableList(blockList, (intptr_t) backwardCell); - backwardCell->startOffset = entryCodeBB->startOffset; - loopBranch->fallThrough = backwardCell; - } else { - loopBranch->fallThrough = entryCodeBB; - } -#else - loopBranch->fallThrough = entryCodeBB; -#endif /* Create the chaining cell as the fallthrough of the exit block */ exitChainingCell = dvmCompilerNewBB(kChainingCellNormal, diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index 4f3434dec..437c2ed96 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -619,6 +619,8 @@ typedef enum ArmOpcode { kThumb2Bfc, /* bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0] */ kThumb2Dmb, /* dmb [1111001110111111100011110101] option[3-0] */ + kThumb2LdrPcReln12, /* ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] + imm12[11-0] */ kArmLast, } ArmOpcode; diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index 4154387ba..b5c04f5d0 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -876,6 +876,11 @@ ArmEncodingMap EncodingMap[kArmLast] = { kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP, "dmb","#!0B",2), + ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, + "ldr", "r!0d, [rpc, -#!1d]", 2), }; /* @@ -1163,21 +1168,21 @@ static void matchSignatureBreakpoint(const CompilationUnit *cUnit, /* * Translation layout in the code cache. Note that the codeAddress pointer * in JitTable will point directly to the code body (field codeAddress). The - * chain cell offset codeAddress - 2, and (if present) executionCount is at - * codeAddress - 6. + * chain cell offset codeAddress - 2, and the address of the trace profile + * counter is at codeAddress - 6. * * +----------------------------+ - * | Execution count | -> [Optional] 4 bytes + * | Trace Profile Counter addr | -> 4 bytes * +----------------------------+ * +--| Offset to chain cell counts| -> 2 bytes * | +----------------------------+ - * | | Code body | -> Start address for translation - * | | | variable in 2-byte chunks - * | . . (JitTable's codeAddress points here) + * | | Trace profile code | <- entry point when profiling + * | . - - - - - - - . + * | | Code body | <- entry point when not profiling * | . . * | | | * | +----------------------------+ - * | | Chaining Cells | -> 12/16 bytes each, must be 4 byte aligned + * | | Chaining Cells | -> 12/16 bytes, 4 byte aligned * | . . * | . . * | | | @@ -1251,13 +1256,10 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG); /* - * Replace the CHAIN_CELL_OFFSET_TAG with the real value. If trace - * profiling is enabled, subtract 4 (occupied by the counter word) from - * the absolute offset as the value stored in chainCellOffsetLIR is the - * delta from &chainCellOffsetLIR to &ChainCellCounts. + * Adjust the CHAIN_CELL_OFFSET_TAG LIR's offset to remove the + * space occupied by the pointer to the trace profiling counter. */ - chainCellOffsetLIR->operands[0] = - gDvmJit.profile ? (chainCellOffset - 4) : chainCellOffset; + chainCellOffsetLIR->operands[0] = chainCellOffset - 4; offset += sizeof(chainCellCounts) + descSize; @@ -1363,6 +1365,8 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) /* If applicable, mark low bit to denote thumb */ if (info->instructionSet != DALVIK_JIT_ARM) info->codeAddress = (char*)info->codeAddress + 1; + /* transfer the size of the profiling code */ + info->profileCodeSize = cUnit->profileCodeSize; } /* @@ -1836,14 +1840,37 @@ static char *getTraceBase(const JitEntry *p) (6 + (p->u.info.instructionSet == DALVIK_JIT_ARM ? 0 : 1)); } +/* Handy function to retrieve the profile count */ +static inline JitTraceCounter_t getProfileCount(const JitEntry *entry) +{ + if (entry->dPC == 0 || entry->codeAddress == 0 || + entry->codeAddress == dvmCompilerGetInterpretTemplate()) + return 0; + + JitTraceCounter_t **p = (JitTraceCounter_t **) getTraceBase(entry); + + return **p; +} + +/* Handy function to reset the profile count */ +static inline void resetProfileCount(const JitEntry *entry) +{ + if (entry->dPC == 0 || entry->codeAddress == 0 || + entry->codeAddress == dvmCompilerGetInterpretTemplate()) + return; + + JitTraceCounter_t **p = (JitTraceCounter_t **) getTraceBase(entry); + + **p = 0; +} + /* Dumps profile info for a single trace */ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset, unsigned long sum) { ChainCellCounts* pCellCounts; char* traceBase; - u4* pExecutionCount; - u4 executionCount; + JitTraceCounter_t count; u2* pCellOffset; JitTraceDescription *desc; const Method* method; @@ -1861,14 +1888,12 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset, LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY 0 0", (int)traceBase); return 0; } - - pExecutionCount = (u4*) (traceBase); - executionCount = *pExecutionCount; + count = getProfileCount(p); if (reset) { - *pExecutionCount =0; + resetProfileCount(p); } if (silent) { - return executionCount; + return count; } pCellOffset = (u2*) (traceBase + 4); pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset); @@ -1893,8 +1918,8 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset, LOGD("TRACEPROFILE 0x%08x % 10d %5.2f%% [%#x(+%d), %d] %s%s;%s", (int)traceBase, - executionCount, - ((float ) executionCount) / sum * 100.0, + count, + ((float ) count) / sum * 100.0, desc->trace[0].frag.startOffset, desc->trace[0].frag.numInsts, addrToLine.lineNum, @@ -1919,7 +1944,7 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset, methodDesc); } - return executionCount; + return count; } /* Create a copy of the trace descriptor of an existing compilation */ @@ -1948,27 +1973,14 @@ JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, return newCopy; } -/* Handy function to retrieve the profile count */ -static inline int getProfileCount(const JitEntry *entry) -{ - if (entry->dPC == 0 || entry->codeAddress == 0 || - entry->codeAddress == dvmCompilerGetInterpretTemplate()) - return 0; - - u4 *pExecutionCount = (u4 *) getTraceBase(entry); - - return *pExecutionCount; -} - - /* qsort callback function */ static int sortTraceProfileCount(const void *entry1, const void *entry2) { const JitEntry *jitEntry1 = (const JitEntry *)entry1; const JitEntry *jitEntry2 = (const JitEntry *)entry2; - int count1 = getProfileCount(jitEntry1); - int count2 = getProfileCount(jitEntry2); + JitTraceCounter_t count1 = getProfileCount(jitEntry1); + JitTraceCounter_t count2 = getProfileCount(jitEntry2); return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); } diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c index 6473edb50..181a128fd 100644 --- a/vm/compiler/codegen/arm/CodegenDriver.c +++ b/vm/compiler/codegen/arm/CodegenDriver.c @@ -3536,7 +3536,6 @@ static void handleHotChainingCell(CompilationUnit *cUnit, addWordData(cUnit, (int) (cUnit->method->insns + offset), true); } -#if defined(WITH_SELF_VERIFICATION) || defined(WITH_JIT_TUNING) /* Chaining cell for branches that branch back into the same basic block */ static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, unsigned int offset) @@ -3558,7 +3557,6 @@ static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, addWordData(cUnit, (int) (cUnit->method->insns + offset), true); } -#endif /* Chaining cell for monomorphic method invocations. */ static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, const Method *callee) @@ -3944,39 +3942,8 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) GrowableListIterator iterator; dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); - if (cUnit->executionCount) { - /* - * Reserve 6 bytes at the beginning of the trace - * +----------------------------+ - * | execution count (4 bytes) | - * +----------------------------+ - * | chain cell offset (2 bytes)| - * +----------------------------+ - * ...and then code to increment the execution - * count: - * mov r0, pc @ move adr of "mov r0,pc" + 4 to r0 - * sub r0, #10 @ back up to addr of executionCount - * ldr r1, [r0] - * add r1, #1 - * str r1, [r0] - */ - newLIR1(cUnit, kArm16BitData, 0); - newLIR1(cUnit, kArm16BitData, 0); - cUnit->chainCellOffsetLIR = - (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG); - cUnit->headerSize = 6; - /* Thumb instruction used directly here to ensure correct size */ - newLIR2(cUnit, kThumbMovRR_H2L, r0, rpc); - newLIR2(cUnit, kThumbSubRI8, r0, 10); - newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0); - newLIR2(cUnit, kThumbAddRI8, r1, 1); - newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0); - } else { - /* Just reserve 2 bytes for the chain cell offset */ - cUnit->chainCellOffsetLIR = - (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG); - cUnit->headerSize = 2; - } + /* Traces start with a profiling entry point. Generate it here */ + cUnit->profileCodeSize = genTraceProfileEntry(cUnit); /* Handle the content in each basic block */ for (i = 0; ; i++) { @@ -4062,7 +4029,6 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) opReg(cUnit, kOpBlx, r1); } break; -#if defined(WITH_SELF_VERIFICATION) || defined(WITH_JIT_TUNING) case kChainingCellBackwardBranch: labelList[i].opcode = kArmPseudoChainingCellBackwardBranch; @@ -4071,7 +4037,6 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) &chainingListByType[kChainingCellBackwardBranch], i); break; -#endif default: break; } @@ -4303,12 +4268,10 @@ gen_fallthrough: case kChainingCellHot: handleHotChainingCell(cUnit, chainingBlock->startOffset); break; -#if defined(WITH_SELF_VERIFICATION) || defined(WITH_JIT_TUNING) case kChainingCellBackwardBranch: handleBackwardBranchChainingCell(cUnit, chainingBlock->startOffset); break; -#endif default: LOGE("Bad blocktype %d", chainingBlock->blockType); dvmCompilerAbort(cUnit); @@ -4342,11 +4305,15 @@ gen_fallthrough: #endif } -/* Accept the work and start compiling */ +/* + * Accept the work and start compiling. Returns true if compilation + * is attempted. + */ bool dvmCompilerDoWork(CompilerWorkOrder *work) { JitTraceDescription *desc; - bool res; + bool isCompile; + bool success = true; if (gDvmJit.codeCacheFull) { return false; @@ -4354,27 +4321,35 @@ bool dvmCompilerDoWork(CompilerWorkOrder *work) switch (work->kind) { case kWorkOrderTrace: + isCompile = true; /* Start compilation with maximally allowed trace length */ desc = (JitTraceDescription *)work->info; - res = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, - work->bailPtr, 0 /* no hints */); + success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, + work->bailPtr, 0 /* no hints */); break; case kWorkOrderTraceDebug: { bool oldPrintMe = gDvmJit.printMe; gDvmJit.printMe = true; + isCompile = true; /* Start compilation with maximally allowed trace length */ desc = (JitTraceDescription *)work->info; - res = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, - work->bailPtr, 0 /* no hints */); + success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, + work->bailPtr, 0 /* no hints */); gDvmJit.printMe = oldPrintMe; break; } + case kWorkOrderProfileMode: + dvmJitChangeProfileMode((TraceProfilingModes)work->info); + isCompile = false; + break; default: - res = false; + isCompile = false; LOGE("Jit: unknown work order type"); assert(0); // Bail if debug build, discard otherwise } - return res; + if (!success) + work->result.codeAddress = NULL; + return isCompile; } /* Architectural-specific debugging helpers go here */ diff --git a/vm/compiler/codegen/arm/Thumb/Gen.c b/vm/compiler/codegen/arm/Thumb/Gen.c index 07f3f092f..b80696595 100644 --- a/vm/compiler/codegen/arm/Thumb/Gen.c +++ b/vm/compiler/codegen/arm/Thumb/Gen.c @@ -23,6 +23,62 @@ */ /* + * Reserve 6 bytes at the beginning of the trace + * +----------------------------+ + * | prof count addr (4 bytes) | + * +----------------------------+ + * | chain cell offset (2 bytes)| + * +----------------------------+ + * + * ...and then code to increment the execution + * + * For continuous profiling (12 bytes): + * + * mov r0, pc @ move adr of "mov r0,pc" + 4 to r0 + * sub r0, #10 @ back up to addr prof count pointer + * ldr r0, [r0] @ get address of counter + * ldr r1, [r0] + * add r1, #1 + * str r1, [r0] + * + * For periodic profiling (4 bytes): + * call TEMPLATE_PERIODIC_PROFILING + * + * and return the size (in bytes) of the generated code. + */ + +static int genTraceProfileEntry(CompilationUnit *cUnit) +{ + intptr_t addr = (intptr_t)dvmJitNextTraceCounter(); + assert(__BYTE_ORDER == __LITTLE_ENDIAN); + newLIR1(cUnit, kArm16BitData, addr & 0xffff); + newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff); + cUnit->chainCellOffsetLIR = + (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG); + cUnit->headerSize = 6; + if ((gDvmJit.profileMode == kTraceProfilingContinuous) || + (gDvmJit.profileMode == kTraceProfilingDisabled)) { + /* Thumb instruction used directly here to ensure correct size */ + newLIR2(cUnit, kThumbMovRR_H2L, r0, rpc); + newLIR2(cUnit, kThumbSubRI8, r0, 10); + newLIR3(cUnit, kThumbLdrRRI5, r0, r0, 0); + newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0); + newLIR2(cUnit, kThumbAddRI8, r1, 1); + newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0); + return 12; + } else { + int opcode = TEMPLATE_PERIODIC_PROFILING; + newLIR2(cUnit, kThumbBlx1, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode], + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + newLIR2(cUnit, kThumbBlx2, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode], + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + return 4; + } +} + +/* * Perform a "reg cmp imm" operation and jump to the PCR region if condition * satisfies. */ diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c index 0891524f7..f5e1096ea 100644 --- a/vm/compiler/codegen/arm/Thumb2/Gen.c +++ b/vm/compiler/codegen/arm/Thumb2/Gen.c @@ -15,13 +15,64 @@ */ /* - * This file contains codegen for the Thumb ISA and is intended to be + * This file contains codegen for the Thumb2 ISA and is intended to be * includes by: * * Codegen-$(TARGET_ARCH_VARIANT).c * */ +/* + * Reserve 6 bytes at the beginning of the trace + * +----------------------------+ + * | prof count addr (4 bytes) | + * +----------------------------+ + * | chain cell offset (2 bytes)| + * +----------------------------+ + * + * ...and then code to increment the execution + * + * For continuous profiling (10 bytes) + * ldr r0, [pc-8] @ get prof count addr [4 bytes] + * ldr r1, [r0] @ load counter [2 bytes] + * add r1, #1 @ increment [2 bytes] + * str r1, [r0] @ store [2 bytes] + * + * For periodic profiling (4 bytes) + * call TEMPLATE_PERIODIC_PROFILING + * + * and return the size (in bytes) of the generated code. + */ + +static int genTraceProfileEntry(CompilationUnit *cUnit) +{ + intptr_t addr = (intptr_t)dvmJitNextTraceCounter(); + assert(__BYTE_ORDER == __LITTLE_ENDIAN); + newLIR1(cUnit, kArm16BitData, addr & 0xffff); + newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff); + cUnit->chainCellOffsetLIR = + (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG); + cUnit->headerSize = 6; + if ((gDvmJit.profileMode == kTraceProfilingContinuous) || + (gDvmJit.profileMode == kTraceProfilingDisabled)) { + /* Thumb[2] instruction used directly here to ensure correct size */ + newLIR2(cUnit, kThumb2LdrPcReln12, r0, 8); + newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0); + newLIR2(cUnit, kThumbAddRI8, r1, 1); + newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0); + return 10; + } else { + int opcode = TEMPLATE_PERIODIC_PROFILING; + newLIR2(cUnit, kThumbBlx1, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode], + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + newLIR2(cUnit, kThumbBlx2, + (int) gDvmJit.codeCache + templateEntryOffsets[opcode], + (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); + return 4; + } +} + static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) { diff --git a/vm/compiler/template/armv5te-vfp/TemplateOpList.h b/vm/compiler/template/armv5te-vfp/TemplateOpList.h index d991bedb0..97addfaf8 100644 --- a/vm/compiler/template/armv5te-vfp/TemplateOpList.h +++ b/vm/compiler/template/armv5te-vfp/TemplateOpList.h @@ -57,3 +57,4 @@ JIT_TEMPLATE(STRING_INDEXOF) JIT_TEMPLATE(INTERPRET) JIT_TEMPLATE(MONITOR_ENTER) JIT_TEMPLATE(MONITOR_ENTER_DEBUG) +JIT_TEMPLATE(PERIODIC_PROFILING) diff --git a/vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S b/vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S new file mode 100644 index 000000000..7f7109646 --- /dev/null +++ b/vm/compiler/template/armv5te/TEMPLATE_PERIODIC_PROFILING.S @@ -0,0 +1,26 @@ + /* + * Increment profile counter for this trace, and decrement + * sample counter. If sample counter goes below zero, turn + * off profiling. + * + * On entry + * (lr-11) is address of pointer to counter. Note: the counter + * actually exists 10 bytes before the return target, but because + * we are arriving from thumb mode, lr will have its low bit set. + */ + ldr r0, [lr,#-11] + ldr r1, [rGLUE, #offGlue_pProfileCountdown] + ldr r2, [r0] @ get counter + ldr r3, [r1] @ get countdown timer + add r2, #1 + subs r2, #1 + blt .L${opcode}_disable_profiling + str r2, [r0] + str r3, [r1] + bx lr + +.L${opcode}_disable_profiling: + mov r4, lr @ preserve lr + ldr r0, .LdvmJitTraceProfilingOff + blx r0 + bx r4 diff --git a/vm/compiler/template/armv5te/TemplateOpList.h b/vm/compiler/template/armv5te/TemplateOpList.h index e81383c5b..663e0df68 100644 --- a/vm/compiler/template/armv5te/TemplateOpList.h +++ b/vm/compiler/template/armv5te/TemplateOpList.h @@ -42,3 +42,4 @@ JIT_TEMPLATE(STRING_INDEXOF) JIT_TEMPLATE(INTERPRET) JIT_TEMPLATE(MONITOR_ENTER) JIT_TEMPLATE(MONITOR_ENTER_DEBUG) +JIT_TEMPLATE(PERIODIC_PROFILING) diff --git a/vm/compiler/template/armv5te/footer.S b/vm/compiler/template/armv5te/footer.S index ba0335b30..7b35e8a1c 100644 --- a/vm/compiler/template/armv5te/footer.S +++ b/vm/compiler/template/armv5te/footer.S @@ -104,6 +104,8 @@ .word dvmMterpCommonExceptionThrown .LdvmLockObject: .word dvmLockObject +.LdvmJitTraceProfilingOff: + .word dvmJitTraceProfilingOff #if defined(WITH_JIT_TUNING) .LdvmICHitCount: .word gDvmICHitCount diff --git a/vm/compiler/template/armv7-a-neon/TemplateOpList.h b/vm/compiler/template/armv7-a-neon/TemplateOpList.h index d991bedb0..97addfaf8 100644 --- a/vm/compiler/template/armv7-a-neon/TemplateOpList.h +++ b/vm/compiler/template/armv7-a-neon/TemplateOpList.h @@ -57,3 +57,4 @@ JIT_TEMPLATE(STRING_INDEXOF) JIT_TEMPLATE(INTERPRET) JIT_TEMPLATE(MONITOR_ENTER) JIT_TEMPLATE(MONITOR_ENTER_DEBUG) +JIT_TEMPLATE(PERIODIC_PROFILING) diff --git a/vm/compiler/template/armv7-a/TemplateOpList.h b/vm/compiler/template/armv7-a/TemplateOpList.h index d991bedb0..97addfaf8 100644 --- a/vm/compiler/template/armv7-a/TemplateOpList.h +++ b/vm/compiler/template/armv7-a/TemplateOpList.h @@ -57,3 +57,4 @@ JIT_TEMPLATE(STRING_INDEXOF) JIT_TEMPLATE(INTERPRET) JIT_TEMPLATE(MONITOR_ENTER) JIT_TEMPLATE(MONITOR_ENTER_DEBUG) +JIT_TEMPLATE(PERIODIC_PROFILING) diff --git a/vm/compiler/template/config-armv5te-vfp b/vm/compiler/template/config-armv5te-vfp index 1b02261eb..30b9200a8 100644 --- a/vm/compiler/template/config-armv5te-vfp +++ b/vm/compiler/template/config-armv5te-vfp @@ -48,6 +48,7 @@ op-start armv5te-vfp op TEMPLATE_INTERPRET armv5te op TEMPLATE_MONITOR_ENTER armv5te op TEMPLATE_MONITOR_ENTER_DEBUG armv5te + op TEMPLATE_PERIODIC_PROFILING armv5te op-end diff --git a/vm/compiler/template/config-armv7-a b/vm/compiler/template/config-armv7-a index be7af31e4..1408ca117 100644 --- a/vm/compiler/template/config-armv7-a +++ b/vm/compiler/template/config-armv7-a @@ -48,6 +48,7 @@ op-start armv5te-vfp op TEMPLATE_INTERPRET armv5te op TEMPLATE_MONITOR_ENTER armv5te op TEMPLATE_MONITOR_ENTER_DEBUG armv5te + op TEMPLATE_PERIODIC_PROFILING armv5te op-end # "helper" code for C; include if you use any of the C stubs (this generates diff --git a/vm/compiler/template/config-armv7-a-neon b/vm/compiler/template/config-armv7-a-neon index be7af31e4..1408ca117 100644 --- a/vm/compiler/template/config-armv7-a-neon +++ b/vm/compiler/template/config-armv7-a-neon @@ -48,6 +48,7 @@ op-start armv5te-vfp op TEMPLATE_INTERPRET armv5te op TEMPLATE_MONITOR_ENTER armv5te op TEMPLATE_MONITOR_ENTER_DEBUG armv5te + op TEMPLATE_PERIODIC_PROFILING armv5te op-end # "helper" code for C; include if you use any of the C stubs (this generates diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S index 8efbcaa60..a107b24de 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S @@ -1473,6 +1473,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: #endif ldr pc, .LdvmJitToInterpNoChain +/* ------------------------------ */ + .balign 4 + .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING +dvmCompiler_TEMPLATE_PERIODIC_PROFILING: +/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */ + /* + * Increment profile counter for this trace, and decrement + * sample counter. If sample counter goes below zero, turn + * off profiling. + * + * On entry + * (lr-11) is address of pointer to counter. Note: the counter + * actually exists 10 bytes before the return target, but because + * we are arriving from thumb mode, lr will have its low bit set. + */ + ldr r0, [lr,#-11] + ldr r1, [rGLUE, #offGlue_pProfileCountdown] + ldr r2, [r0] @ get counter + ldr r3, [r1] @ get countdown timer + add r2, #1 + subs r2, #1 + blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling + str r2, [r0] + str r3, [r1] + bx lr + +.LTEMPLATE_PERIODIC_PROFILING_disable_profiling: + mov r4, lr @ preserve lr + ldr r0, .LdvmJitTraceProfilingOff + blx r0 + bx r4 + .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart /* File: armv5te/footer.S */ /* @@ -1581,6 +1613,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: .word dvmMterpCommonExceptionThrown .LdvmLockObject: .word dvmLockObject +.LdvmJitTraceProfilingOff: + .word dvmJitTraceProfilingOff #if defined(WITH_JIT_TUNING) .LdvmICHitCount: .word gDvmICHitCount diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S index 0df3ae65a..a6a0e9ff8 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S @@ -1204,6 +1204,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: #endif ldr pc, .LdvmJitToInterpNoChain +/* ------------------------------ */ + .balign 4 + .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING +dvmCompiler_TEMPLATE_PERIODIC_PROFILING: +/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */ + /* + * Increment profile counter for this trace, and decrement + * sample counter. If sample counter goes below zero, turn + * off profiling. + * + * On entry + * (lr-11) is address of pointer to counter. Note: the counter + * actually exists 10 bytes before the return target, but because + * we are arriving from thumb mode, lr will have its low bit set. + */ + ldr r0, [lr,#-11] + ldr r1, [rGLUE, #offGlue_pProfileCountdown] + ldr r2, [r0] @ get counter + ldr r3, [r1] @ get countdown timer + add r2, #1 + subs r2, #1 + blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling + str r2, [r0] + str r3, [r1] + bx lr + +.LTEMPLATE_PERIODIC_PROFILING_disable_profiling: + mov r4, lr @ preserve lr + ldr r0, .LdvmJitTraceProfilingOff + blx r0 + bx r4 + .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart /* File: armv5te/footer.S */ /* @@ -1312,6 +1344,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: .word dvmMterpCommonExceptionThrown .LdvmLockObject: .word dvmLockObject +.LdvmJitTraceProfilingOff: + .word dvmJitTraceProfilingOff #if defined(WITH_JIT_TUNING) .LdvmICHitCount: .word gDvmICHitCount diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S index ee3f8cbec..e4ed30bef 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S @@ -1473,6 +1473,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: #endif ldr pc, .LdvmJitToInterpNoChain +/* ------------------------------ */ + .balign 4 + .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING +dvmCompiler_TEMPLATE_PERIODIC_PROFILING: +/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */ + /* + * Increment profile counter for this trace, and decrement + * sample counter. If sample counter goes below zero, turn + * off profiling. + * + * On entry + * (lr-11) is address of pointer to counter. Note: the counter + * actually exists 10 bytes before the return target, but because + * we are arriving from thumb mode, lr will have its low bit set. + */ + ldr r0, [lr,#-11] + ldr r1, [rGLUE, #offGlue_pProfileCountdown] + ldr r2, [r0] @ get counter + ldr r3, [r1] @ get countdown timer + add r2, #1 + subs r2, #1 + blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling + str r2, [r0] + str r3, [r1] + bx lr + +.LTEMPLATE_PERIODIC_PROFILING_disable_profiling: + mov r4, lr @ preserve lr + ldr r0, .LdvmJitTraceProfilingOff + blx r0 + bx r4 + .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart /* File: armv5te/footer.S */ /* @@ -1581,6 +1613,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: .word dvmMterpCommonExceptionThrown .LdvmLockObject: .word dvmLockObject +.LdvmJitTraceProfilingOff: + .word dvmJitTraceProfilingOff #if defined(WITH_JIT_TUNING) .LdvmICHitCount: .word gDvmICHitCount diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S index 3875f5a24..fc26b3a95 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S @@ -1473,6 +1473,38 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: #endif ldr pc, .LdvmJitToInterpNoChain +/* ------------------------------ */ + .balign 4 + .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING +dvmCompiler_TEMPLATE_PERIODIC_PROFILING: +/* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */ + /* + * Increment profile counter for this trace, and decrement + * sample counter. If sample counter goes below zero, turn + * off profiling. + * + * On entry + * (lr-11) is address of pointer to counter. Note: the counter + * actually exists 10 bytes before the return target, but because + * we are arriving from thumb mode, lr will have its low bit set. + */ + ldr r0, [lr,#-11] + ldr r1, [rGLUE, #offGlue_pProfileCountdown] + ldr r2, [r0] @ get counter + ldr r3, [r1] @ get countdown timer + add r2, #1 + subs r2, #1 + blt .LTEMPLATE_PERIODIC_PROFILING_disable_profiling + str r2, [r0] + str r3, [r1] + bx lr + +.LTEMPLATE_PERIODIC_PROFILING_disable_profiling: + mov r4, lr @ preserve lr + ldr r0, .LdvmJitTraceProfilingOff + blx r0 + bx r4 + .size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart /* File: armv5te/footer.S */ /* @@ -1581,6 +1613,8 @@ dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG: .word dvmMterpCommonExceptionThrown .LdvmLockObject: .word dvmLockObject +.LdvmJitTraceProfilingOff: + .word dvmJitTraceProfilingOff #if defined(WITH_JIT_TUNING) .LdvmICHitCount: .word gDvmICHitCount diff --git a/vm/interp/InterpDefs.h b/vm/interp/InterpDefs.h index 3c0d2e39e..2a20bfc2e 100644 --- a/vm/interp/InterpDefs.h +++ b/vm/interp/InterpDefs.h @@ -161,18 +161,13 @@ typedef struct InterpState { */ unsigned char** ppJitProfTable; // Used to refresh pJitProfTable int icRechainCount; // Count down to next rechain request - const void* jitCacheStart; // Code cache boundaries - const void* jitCacheEnd; -#endif + const void* pProfileCountdown; // Address of profile countdown timer - bool debugIsMethodEntry; // used for method entry event triggers -#if defined(WITH_TRACKREF_CHECKS) - int debugTrackedRefStart; // tracked refs from prior invocations -#endif - -#if defined(WITH_JIT) struct JitToInterpEntries jitToInterpEntries; + const void* jitCacheStart; // Code cache boundaries + const void* jitCacheEnd; + int currTraceRun; int totalTraceLen; // Number of Dalvik insts in trace const u2* currTraceHead; // Start of the trace we're building @@ -185,6 +180,12 @@ typedef struct InterpState { double calleeSave[JIT_CALLEE_SAVE_DOUBLE_COUNT]; #endif + bool debugIsMethodEntry; // used for method entry event triggers +#if defined(WITH_TRACKREF_CHECKS) + int debugTrackedRefStart; // tracked refs from prior invocations +#endif + + } InterpState; /* diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c index 8eb985639..5cfd28d3a 100644 --- a/vm/interp/Jit.c +++ b/vm/interp/Jit.c @@ -511,7 +511,7 @@ void dvmJitStats() LOGD("JIT: %d Translation chains, %d interp stubs", gDvmJit.translationChains, stubs); - if (gDvmJit.profile) { + if (gDvmJit.profileMode == kTraceProfilingContinuous) { dvmCompilerSortAndPrintTraceProfiles(); } } @@ -987,23 +987,30 @@ void* dvmJitGetCodeAddr(const u2* dPC) const u2* npc = gDvmJit.pJitEntryTable[idx].dPC; if (npc != NULL) { bool hideTranslation = dvmJitHideTranslation(); - if (npc == dPC) { + int offset = (gDvmJit.profileMode >= kTraceProfilingContinuous) ? + 0 : gDvmJit.pJitEntryTable[idx].u.info.profileOffset; + intptr_t codeAddress = + (intptr_t)gDvmJit.pJitEntryTable[idx].codeAddress; #if defined(WITH_JIT_TUNING) gDvmJit.addrLookupsFound++; #endif - return hideTranslation ? - NULL : gDvmJit.pJitEntryTable[idx].codeAddress; + return hideTranslation ? NULL : (void *)(codeAddress + offset); } else { int chainEndMarker = gDvmJit.jitTableSize; while (gDvmJit.pJitEntryTable[idx].u.info.chain != chainEndMarker) { idx = gDvmJit.pJitEntryTable[idx].u.info.chain; if (gDvmJit.pJitEntryTable[idx].dPC == dPC) { + int offset = (gDvmJit.profileMode >= + kTraceProfilingContinuous) ? 0 : + gDvmJit.pJitEntryTable[idx].u.info.profileOffset; + intptr_t codeAddress = + (intptr_t)gDvmJit.pJitEntryTable[idx].codeAddress; #if defined(WITH_JIT_TUNING) gDvmJit.addrLookupsFound++; #endif - return hideTranslation ? - NULL : gDvmJit.pJitEntryTable[idx].codeAddress; + return hideTranslation ? NULL : + (void *)(codeAddress + offset); } } } @@ -1019,9 +1026,16 @@ void* dvmJitGetCodeAddr(const u2* dPC) * NOTE: Once a codeAddress field transitions from initial state to * JIT'd code, it must not be altered without first halting all * threads. This routine should only be called by the compiler - * thread. + * thread. We defer the setting of the profile prefix size until + * after the new code address is set to ensure that the prefix offset + * is never applied to the initial interpret-only translation. All + * translations with non-zero profile prefixes will still be correct + * if entered as if the profile offset is 0, but the interpret-only + * template cannot handle a non-zero prefix. */ -void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set) { +void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set, + int profilePrefixSize) +{ JitEntryInfoUnion oldValue; JitEntryInfoUnion newValue; JitEntry *jitEntry = lookupAndAdd(dPC, false); @@ -1035,6 +1049,8 @@ void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set) { oldValue.infoWord, newValue.infoWord, &jitEntry->u.infoWord) != 0); jitEntry->codeAddress = nPC; + newValue.info.profileOffset = profilePrefixSize; + jitEntry->u = newValue; } /* @@ -1286,6 +1302,7 @@ bool dvmJitResizeJitTable( unsigned int size ) p->u.info.chain = chain; } } + dvmUnlockMutex(&gDvmJit.tableLock); free(pOldTable); @@ -1306,6 +1323,15 @@ void dvmJitResetTable(void) unsigned int i; dvmLockMutex(&gDvmJit.tableLock); + + /* Note: If need to preserve any existing counts. Do so here. */ + for (i=0; i < JIT_PROF_BLOCK_BUCKETS; i++) { + if (gDvmJit.pJitTraceProfCounters->buckets[i]) + memset((void *) gDvmJit.pJitTraceProfCounters->buckets[i], + 0, sizeof(JitTraceCounter_t) * JIT_PROF_BLOCK_ENTRIES); + } + gDvmJit.pJitTraceProfCounters->next = 0; + memset((void *) jitEntry, 0, sizeof(JitEntry) * size); for (i=0; i< size; i++) { jitEntry[i].u.info.chain = size; /* Initialize chain termination */ @@ -1315,6 +1341,31 @@ void dvmJitResetTable(void) } /* + * Return the address of the next trace profile counter. This address + * will be embedded in the generated code for the trace, and thus cannot + * change while the trace exists. + */ +JitTraceCounter_t *dvmJitNextTraceCounter() +{ + int idx = gDvmJit.pJitTraceProfCounters->next / JIT_PROF_BLOCK_ENTRIES; + int elem = gDvmJit.pJitTraceProfCounters->next % JIT_PROF_BLOCK_ENTRIES; + JitTraceCounter_t *res; + /* Lazily allocate blocks of counters */ + if (!gDvmJit.pJitTraceProfCounters->buckets[idx]) { + JitTraceCounter_t *p = + (JitTraceCounter_t*) calloc(JIT_PROF_BLOCK_ENTRIES, sizeof(*p)); + if (!p) { + LOGE("Failed to allocate block of trace profile counters"); + dvmAbort(); + } + gDvmJit.pJitTraceProfCounters->buckets[idx] = p; + } + res = &gDvmJit.pJitTraceProfCounters->buckets[idx][elem]; + gDvmJit.pJitTraceProfCounters->next++; + return res; +} + +/* * Float/double conversion requires clamping to min and max of integer form. If * target doesn't support this normally, use these. */ @@ -1346,4 +1397,33 @@ s8 dvmJitf2l(float f) return (s8)f; } +/* Should only be called by the compiler thread */ +void dvmJitChangeProfileMode(TraceProfilingModes newState) +{ + if (gDvmJit.profileMode != newState) { + gDvmJit.profileMode = newState; + dvmJitUnchainAll(); + } +} + +void dvmJitTraceProfilingOn() +{ + if (gDvmJit.profileMode == kTraceProfilingPeriodicOff) + dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode, + (void*) kTraceProfilingPeriodicOn); + else if (gDvmJit.profileMode == kTraceProfilingDisabled) + dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode, + (void*) kTraceProfilingContinuous); +} + +void dvmJitTraceProfilingOff() +{ + if (gDvmJit.profileMode == kTraceProfilingPeriodicOn) + dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode, + (void*) kTraceProfilingPeriodicOff); + else if (gDvmJit.profileMode == kTraceProfilingContinuous) + dvmCompilerWorkEnqueue(NULL, kWorkOrderProfileMode, + (void*) kTraceProfilingDisabled); +} + #endif /* WITH_JIT */ diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h index 6101f54ae..dda14d2bf 100644 --- a/vm/interp/Jit.h +++ b/vm/interp/Jit.h @@ -83,6 +83,29 @@ static inline u4 dvmJitHash( const u2* p ) { } /* + * The width of the chain field in JitEntryInfo sets the upper + * bound on the number of translations. Be careful if changing + * the size of JitEntry struct - the Dalvik PC to JitEntry + * hash functions have built-in knowledge of the size. + */ +#define JIT_ENTRY_CHAIN_WIDTH 2 +#define JIT_MAX_ENTRIES (1 << (JIT_ENTRY_CHAIN_WIDTH * 8)) + +/* + * The trace profiling counters are allocated in blocks and individual + * counters must not move so long as any referencing trace exists. + */ +#define JIT_PROF_BLOCK_ENTRIES 1024 +#define JIT_PROF_BLOCK_BUCKETS (JIT_MAX_ENTRIES / JIT_PROF_BLOCK_ENTRIES) + +typedef s4 JitTraceCounter_t; + +typedef struct JitTraceProfCounters { + unsigned int next; + JitTraceCounter_t *buckets[JIT_PROF_BLOCK_BUCKETS]; +} JitTraceProfCounters; + +/* * Entries in the JIT's address lookup hash table. * Fields which may be updated by multiple threads packed into a * single 32-bit word to allow use of atomic update. @@ -94,7 +117,7 @@ typedef struct JitEntryInfo { unsigned int inlineCandidate:1; unsigned int profileEnabled:1; JitInstructionSetType instructionSet:4; - unsigned int unused:8; + unsigned int profileOffset:8; u2 chain; /* Index of next in chain */ } JitEntryInfo; @@ -120,7 +143,12 @@ void dvmJitResetTable(void); struct JitEntry *dvmFindJitEntry(const u2* pc); s8 dvmJitd2l(double d); s8 dvmJitf2l(float f); -void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set); +void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set, + int profilePrefixSize); void dvmJitAbortTraceSelect(InterpState* interpState); +JitTraceCounter_t *dvmJitNextTraceCounter(void); +void dvmJitTraceProfilingOff(void); +void dvmJitTraceProfilingOn(void); +void dvmJitChangeProfileMode(TraceProfilingModes newState); #endif /*_DALVIK_INTERP_JIT*/ diff --git a/vm/mterp/Mterp.c b/vm/mterp/Mterp.c index 0cd5a1fb2..68218a3da 100644 --- a/vm/mterp/Mterp.c +++ b/vm/mterp/Mterp.c @@ -83,6 +83,7 @@ bool dvmMterpStd(Thread* self, InterpState* glue) glue->jitThreshold = gDvmJit.threshold; glue->jitCacheStart = gDvmJit.codeCache; glue->jitCacheEnd = (char*)gDvmJit.codeCache + gDvmJit.codeCacheSize; + glue->pProfileCountdown = &gDvmJit.profileCountdown; #endif #if defined(WITH_INLINE_PROFILING) /* diff --git a/vm/mterp/common/asm-constants.h b/vm/mterp/common/asm-constants.h index e5a8a0429..8af792914 100644 --- a/vm/mterp/common/asm-constants.h +++ b/vm/mterp/common/asm-constants.h @@ -107,8 +107,14 @@ MTERP_OFFSET(offGlue_jitResumeDPC, MterpGlue, jitResumeDPC, 72) MTERP_OFFSET(offGlue_jitThreshold, MterpGlue, jitThreshold, 76) MTERP_OFFSET(offGlue_ppJitProfTable, MterpGlue, ppJitProfTable, 80) MTERP_OFFSET(offGlue_icRechainCount, MterpGlue, icRechainCount, 84) -MTERP_OFFSET(offGlue_jitCacheStart, MterpGlue, jitCacheStart, 88) -MTERP_OFFSET(offGlue_jitCacheEnd, MterpGlue, jitCacheEnd, 92) +MTERP_OFFSET(offGlue_pProfileCountdown, MterpGlue, pProfileCountdown, 88) +#if defined(WITH_SELF_VERIFICATION) +MTERP_OFFSET(offGlue_jitCacheStart, MterpGlue, jitCacheStart, 124) +MTERP_OFFSET(offGlue_jitCacheEnd, MterpGlue, jitCacheEnd, 128) +#else +MTERP_OFFSET(offGlue_jitCacheStart, MterpGlue, jitCacheStart, 120) +MTERP_OFFSET(offGlue_jitCacheEnd, MterpGlue, jitCacheEnd, 124) +#endif #endif /* make sure all JValue union members are stored at the same offset */ MTERP_OFFSET(offGlue_retval_z, MterpGlue, retval.z, 8) |