diff options
author | Bill Buzbee <buzbee@google.com> | 2010-05-13 13:02:53 -0700 |
---|---|---|
committer | buzbee <buzbee@google.com> | 2010-05-17 12:18:10 -0700 |
commit | bd0472480c6e876198fe19c4ffa22350c0ce57da (patch) | |
tree | 8b217d10bb8bc1349a244b93f0258cea194a0aaa /vm/compiler | |
parent | 18d0e3f43f0afd38693baaf74807c37ac9ef5ebe (diff) | |
download | android_dalvik-bd0472480c6e876198fe19c4ffa22350c0ce57da.tar.gz android_dalvik-bd0472480c6e876198fe19c4ffa22350c0ce57da.tar.bz2 android_dalvik-bd0472480c6e876198fe19c4ffa22350c0ce57da.zip |
JIT: Fix for [Issue 2675245] FRF40 monkey crash in jit-cache
The JIT's chaining mechanism suffered from a narrow window that
could result in i-cache inconsistency. One of the forms of chaining
cell consisted of a two 16-bit thumb instruction sequence. If a thread were
interrupted between the execution of those two instructions *and*
another thread picked that moment to convert that cell's
chained/unchained state, then bad things happen.
This CL alters the chain/unchain model somewhat to avoid this case.
Chainable chaining cells grow by 4 bytes each, and instead of rewriting
a 32-bit cell to chain/unchain, we switch between chained and unchained
state by [re]writing the first 16-bits of the cell as either a 16-bit
Thumb unconditional branch (unchained mode) or the first half of a
32-bit Thumb branch. The 2nd 16-bits of the cell will never change once
the cell moves from its inital state - thus avoiding the possibility of it
becoming inconsistent.
This adds a trivial execution penalty on the slow path, but will add
about a kByte of memory usage to a typical process.
Change-Id: Id8b99802e11386cfbab23da6abae10e2d9fc4065
Diffstat (limited to 'vm/compiler')
-rw-r--r-- | vm/compiler/Compiler.c | 2 | ||||
-rw-r--r-- | vm/compiler/Compiler.h | 1 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/ArmLIR.h | 3 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/Assemble.c | 72 | ||||
-rw-r--r-- | vm/compiler/codegen/arm/CodegenDriver.c | 38 | ||||
-rw-r--r-- | vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S | 6 | ||||
-rw-r--r-- | vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S | 6 | ||||
-rw-r--r-- | vm/compiler/template/out/CompilerTemplateAsm-armv5te.S | 6 | ||||
-rw-r--r-- | vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S | 6 | ||||
-rw-r--r-- | vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S | 6 |
10 files changed, 83 insertions, 63 deletions
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c index f003692e7..2f72ef531 100644 --- a/vm/compiler/Compiler.c +++ b/vm/compiler/Compiler.c @@ -615,7 +615,7 @@ static void *compilerThreadStart(void *arg) } if (aborted || !compileOK) { dvmCompilerArenaReset(); - work.result.codeAddress = gDvmJit.interpretTemplate; + work.result.codeAddress = dvmCompilerGetInterpretTemplate(); } else if (!work.result.discardResult) { dvmJitSetCodeAddr(work.pc, work.result.codeAddress, work.result.instructionSet); diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h index e42d98696..46f1799c0 100644 --- a/vm/compiler/Compiler.h +++ b/vm/compiler/Compiler.h @@ -195,4 +195,5 @@ void dvmCompilerDataFlowAnalysisDispatcher(struct CompilationUnit *cUnit, void dvmCompilerStateRefresh(void); JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, const struct JitEntry *desc); +void *dvmCompilerGetInterpretTemplate(); #endif /* _DALVIK_VM_COMPILER */ diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index f7704ade5..f580d6a4e 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -784,4 +784,7 @@ typedef struct ArmLIR { #define CHAIN_CELL_OFFSET_TAG 0xcdab +#define CHAIN_CELL_NORMAL_SIZE 12 +#define CHAIN_CELL_PREDICTED_SIZE 16 + #endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMLIR_H */ diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index 499e28730..05d311b33 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -1159,7 +1159,7 @@ static void matchSignatureBreakpoint(const CompilationUnit *cUnit, * | . . * | | | * | +----------------------------+ - * | | Chaining Cells | -> 8 bytes each, must be 4 byte aligned + * | | Chaining Cells | -> 12/16 bytes each, must be 4 byte aligned * | . . * | . . * | | | @@ -1395,10 +1395,18 @@ void* dvmJitChain(void* tgtAddr, u4* branchAddr) * mix Arm & Thumb[2] translations, the following code should be * generalized. */ - thumbTarget = (tgtAddr != gDvmJit.interpretTemplate); + thumbTarget = (tgtAddr != dvmCompilerGetInterpretTemplate()); newInst = assembleChainingBranch(branchOffset, thumbTarget); + /* + * The second half-word instruction of the chaining cell must + * either be a nop (which represents initial state), or is the + * same exact branch halfword that we are trying to install. + */ + assert( ((*branchAddr >> 16) == getSkeleton(kThumbOrr)) || + ((*branchAddr >> 16) == (newInst >> 16))); + *branchAddr = newInst; cacheflush((long)branchAddr, (long)branchAddr + 4, 0); UPDATE_CODE_CACHE_PATCHES(); @@ -1512,7 +1520,8 @@ const Method *dvmJitToPatchPredictedChain(const Method *method, * Compilation not made yet for the callee. Reset the counter to a small * value and come back to check soon. */ - if ((tgtAddr == 0) || ((void*)tgtAddr == gDvmJit.interpretTemplate)) { + if ((tgtAddr == 0) || + ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { /* * Wait for a few invocations (currently set to be 16) before trying * to setup the chain again. @@ -1641,9 +1650,10 @@ u4* dvmJitUnchain(void* codeAddr) /* Get total count of chain cells */ for (i = 0, cellSize = 0; i < kChainingCellGap; i++) { if (i != kChainingCellInvokePredicted) { - cellSize += pChainCellCounts->u.count[i] * 2; + cellSize += pChainCellCounts->u.count[i] * (CHAIN_CELL_NORMAL_SIZE >> 2); } else { - cellSize += pChainCellCounts->u.count[i] * 4; + cellSize += pChainCellCounts->u.count[i] * + (CHAIN_CELL_PREDICTED_SIZE >> 2); } } @@ -1656,25 +1666,30 @@ u4* dvmJitUnchain(void* codeAddr) /* The cells are sorted in order - walk through them and reset */ for (i = 0; i < kChainingCellGap; i++) { - int elemSize = 2; /* Most chaining cell has two words */ + int elemSize = CHAIN_CELL_NORMAL_SIZE >> 2; /* In 32-bit words */ if (i == kChainingCellInvokePredicted) { - elemSize = 4; + elemSize = CHAIN_CELL_PREDICTED_SIZE >> 2; } for (j = 0; j < pChainCellCounts->u.count[i]; j++) { - int targetOffset; switch(i) { case kChainingCellNormal: - targetOffset = offsetof(InterpState, - jitToInterpEntries.dvmJitToInterpNormal); - break; case kChainingCellHot: case kChainingCellInvokeSingleton: - targetOffset = offsetof(InterpState, - jitToInterpEntries.dvmJitToInterpTraceSelect); + case kChainingCellBackwardBranch: + /* + * Replace the 1st half-word of the cell with an + * unconditional branch, leaving the 2nd half-word + * untouched. This avoids problems with a thread + * that is suspended between the two halves when + * this unchaining takes place. + */ + newInst = *pChainCells; + newInst &= 0xFFFF0000; + newInst |= getSkeleton(kThumbBUncond); /* b offset is 0 */ + *pChainCells = newInst; break; case kChainingCellInvokePredicted: - targetOffset = 0; predChainCell = (PredictedChainingCell *) pChainCells; /* * There could be a race on another mutator thread to use @@ -1685,37 +1700,12 @@ u4* dvmJitUnchain(void* codeAddr) */ predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; break; -#if defined(WITH_SELF_VERIFICATION) - case kChainingCellBackwardBranch: - targetOffset = offsetof(InterpState, - jitToInterpEntries.dvmJitToInterpBackwardBranch); - break; -#elif defined(WITH_JIT_TUNING) - case kChainingCellBackwardBranch: - targetOffset = offsetof(InterpState, - jitToInterpEntries.dvmJitToInterpNormal); - break; -#endif default: - targetOffset = 0; // make gcc happy LOGE("Unexpected chaining type: %d", i); dvmAbort(); // dvmAbort OK here - can't safely recover } COMPILER_TRACE_CHAINING( LOGD("Jit Runtime: unchaining 0x%x", (int)pChainCells)); - /* - * Thumb code sequence for a chaining cell is: - * ldr r0, rGLUE, #<word offset> - * blx r0 - */ - if (i != kChainingCellInvokePredicted) { - targetOffset = targetOffset >> 2; /* convert to word offset */ - thumb1 = 0x6800 | (targetOffset << 6) | - (rGLUE << 3) | (r0 << 0); - thumb2 = 0x4780 | (r0 << 3); - newInst = thumb2<<16 | thumb1; - *pChainCells = newInst; - } pChainCells += elemSize; /* Advance by a fixed number of words */ } } @@ -1735,7 +1725,7 @@ void dvmJitUnchainAll() if (gDvmJit.pJitEntryTable[i].dPC && gDvmJit.pJitEntryTable[i].codeAddress && (gDvmJit.pJitEntryTable[i].codeAddress != - gDvmJit.interpretTemplate)) { + dvmCompilerGetInterpretTemplate())) { u4* lastAddress; lastAddress = dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress); @@ -1797,7 +1787,7 @@ static int dumpTraceProfile(JitEntry *p, bool silent, bool reset, LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase); return 0; } - if (p->codeAddress == gDvmJit.interpretTemplate) { + if (p->codeAddress == dvmCompilerGetInterpretTemplate()) { if (!silent) LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY 0 0", (int)traceBase); return 0; diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c index 2cd16a1c0..515e8afa9 100644 --- a/vm/compiler/codegen/arm/CodegenDriver.c +++ b/vm/compiler/codegen/arm/CodegenDriver.c @@ -2501,10 +2501,10 @@ static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir) * blx &findPackedSwitchIndex * mov pc, r0 * .align4 - * chaining cell for case 0 [8 bytes] - * chaining cell for case 1 [8 bytes] + * chaining cell for case 0 [12 bytes] + * chaining cell for case 1 [12 bytes] * : - * chaining cell for case MIN(size, MAX_CHAINED_SWITCH_CASES)-1 [8 bytes] + * chaining cell for case MIN(size, MAX_CHAINED_SWITCH_CASES)-1 [12 bytes] * chaining cell for case default [8 bytes] * noChain exit */ @@ -2555,7 +2555,7 @@ static s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc) jumpIndex = index; } - chainingPC += jumpIndex * 8; + chainingPC += jumpIndex * CHAIN_CELL_NORMAL_SIZE; return (((s8) caseDPCOffset) << 32) | (u8) chainingPC; } @@ -2606,13 +2606,14 @@ static s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc) /* MAX_CHAINED_SWITCH_CASES + 1 is the start of the overflow case */ int jumpIndex = (i < MAX_CHAINED_SWITCH_CASES) ? i : MAX_CHAINED_SWITCH_CASES + 1; - chainingPC += jumpIndex * 8; + chainingPC += jumpIndex * CHAIN_CELL_NORMAL_SIZE; return (((s8) entries[i]) << 32) | (u8) chainingPC; } else if (k > testVal) { break; } } - return chainingPC + MIN(size, MAX_CHAINED_SWITCH_CASES) * 8; + return chainingPC + MIN(size, MAX_CHAINED_SWITCH_CASES) * + CHAIN_CELL_NORMAL_SIZE; } static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir) @@ -3317,6 +3318,27 @@ static bool handleFmt51l(CompilationUnit *cUnit, MIR *mir) * Dalvik PC and special-purpose registers are reconstructed here. */ +/* + * Insert a + * b .+4 + * nop + * pair at the beginning of a chaining cell. This serves as the + * switch branch that selects between reverting to the interpreter or + * not. Once the cell is chained to a translation, the cell will + * contain a 32-bit branch. Subsequent chain/unchain operations will + * then only alter that first 16-bits - the "b .+4" for unchaining, + * and the restoration of the first half of the 32-bit branch for + * rechaining. + */ +static void insertChainingSwitch(CompilationUnit *cUnit) +{ + ArmLIR *branch = newLIR0(cUnit, kThumbBUncond); + newLIR2(cUnit, kThumbOrr, r0, r0); + ArmLIR *target = newLIR0(cUnit, kArmPseudoTargetLabel); + target->defMask = ENCODE_ALL; + branch->generic.target = (LIR *) target; +} + /* Chaining cell for code that may need warmup. */ static void handleNormalChainingCell(CompilationUnit *cUnit, unsigned int offset) @@ -3325,6 +3347,7 @@ static void handleNormalChainingCell(CompilationUnit *cUnit, * Use raw instruction constructors to guarantee that the generated * instructions fit the predefined cell size. */ + insertChainingSwitch(cUnit); newLIR3(cUnit, kThumbLdrRRI5, r0, rGLUE, offsetof(InterpState, jitToInterpEntries.dvmJitToInterpNormal) >> 2); @@ -3343,6 +3366,7 @@ static void handleHotChainingCell(CompilationUnit *cUnit, * Use raw instruction constructors to guarantee that the generated * instructions fit the predefined cell size. */ + insertChainingSwitch(cUnit); newLIR3(cUnit, kThumbLdrRRI5, r0, rGLUE, offsetof(InterpState, jitToInterpEntries.dvmJitToInterpTraceSelect) >> 2); @@ -3359,6 +3383,7 @@ static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, * Use raw instruction constructors to guarantee that the generated * instructions fit the predefined cell size. */ + insertChainingSwitch(cUnit); #if defined(WITH_SELF_VERIFICATION) newLIR3(cUnit, kThumbLdrRRI5, r0, rGLUE, offsetof(InterpState, @@ -3380,6 +3405,7 @@ static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, * Use raw instruction constructors to guarantee that the generated * instructions fit the predefined cell size. */ + insertChainingSwitch(cUnit); newLIR3(cUnit, kThumbLdrRRI5, r0, rGLUE, offsetof(InterpState, jitToInterpEntries.dvmJitToInterpTraceSelect) >> 2); diff --git a/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S b/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S index 548440042..2b0c730b7 100644 --- a/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S +++ b/vm/compiler/template/armv5te/TEMPLATE_INTERPRET.S @@ -3,17 +3,17 @@ * any lookups. It may be called either as part of a normal chaining * operation, or from the transition code in header.S. We distinquish * the two cases by looking at the link register. If called from a - * translation chain, it will point to the chaining Dalvik PC + 1. + * translation chain, it will point to the chaining Dalvik PC -3. * On entry: * lr - if NULL: * r1 - the Dalvik PC to begin interpretation. * else - * [lr, #-1] contains Dalvik PC to begin interpretation + * [lr, #3] contains Dalvik PC to begin interpretation * rGLUE - pointer to interpState * rFP - Dalvik frame pointer */ cmp lr, #0 - ldrne r1,[lr, #-1] + ldrne r1,[lr, #3] ldr r2, .LinterpPunt mov r0, r1 @ set Dalvik PC bx r2 diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S index b2a499862..2f92ebf70 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S @@ -1354,17 +1354,17 @@ dvmCompiler_TEMPLATE_INTERPRET: * any lookups. It may be called either as part of a normal chaining * operation, or from the transition code in header.S. We distinquish * the two cases by looking at the link register. If called from a - * translation chain, it will point to the chaining Dalvik PC + 1. + * translation chain, it will point to the chaining Dalvik PC -3. * On entry: * lr - if NULL: * r1 - the Dalvik PC to begin interpretation. * else - * [lr, #-1] contains Dalvik PC to begin interpretation + * [lr, #3] contains Dalvik PC to begin interpretation * rGLUE - pointer to interpState * rFP - Dalvik frame pointer */ cmp lr, #0 - ldrne r1,[lr, #-1] + ldrne r1,[lr, #3] ldr r2, .LinterpPunt mov r0, r1 @ set Dalvik PC bx r2 diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S index cc7455fe6..de58c23dc 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te.S @@ -1082,17 +1082,17 @@ dvmCompiler_TEMPLATE_INTERPRET: * any lookups. It may be called either as part of a normal chaining * operation, or from the transition code in header.S. We distinquish * the two cases by looking at the link register. If called from a - * translation chain, it will point to the chaining Dalvik PC + 1. + * translation chain, it will point to the chaining Dalvik PC -3. * On entry: * lr - if NULL: * r1 - the Dalvik PC to begin interpretation. * else - * [lr, #-1] contains Dalvik PC to begin interpretation + * [lr, #3] contains Dalvik PC to begin interpretation * rGLUE - pointer to interpState * rFP - Dalvik frame pointer */ cmp lr, #0 - ldrne r1,[lr, #-1] + ldrne r1,[lr, #3] ldr r2, .LinterpPunt mov r0, r1 @ set Dalvik PC bx r2 diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S index 5692b1156..4142099b5 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a-neon.S @@ -1354,17 +1354,17 @@ dvmCompiler_TEMPLATE_INTERPRET: * any lookups. It may be called either as part of a normal chaining * operation, or from the transition code in header.S. We distinquish * the two cases by looking at the link register. If called from a - * translation chain, it will point to the chaining Dalvik PC + 1. + * translation chain, it will point to the chaining Dalvik PC -3. * On entry: * lr - if NULL: * r1 - the Dalvik PC to begin interpretation. * else - * [lr, #-1] contains Dalvik PC to begin interpretation + * [lr, #3] contains Dalvik PC to begin interpretation * rGLUE - pointer to interpState * rFP - Dalvik frame pointer */ cmp lr, #0 - ldrne r1,[lr, #-1] + ldrne r1,[lr, #3] ldr r2, .LinterpPunt mov r0, r1 @ set Dalvik PC bx r2 diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S index b75471fdf..8148f4d18 100644 --- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S +++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S @@ -1354,17 +1354,17 @@ dvmCompiler_TEMPLATE_INTERPRET: * any lookups. It may be called either as part of a normal chaining * operation, or from the transition code in header.S. We distinquish * the two cases by looking at the link register. If called from a - * translation chain, it will point to the chaining Dalvik PC + 1. + * translation chain, it will point to the chaining Dalvik PC -3. * On entry: * lr - if NULL: * r1 - the Dalvik PC to begin interpretation. * else - * [lr, #-1] contains Dalvik PC to begin interpretation + * [lr, #3] contains Dalvik PC to begin interpretation * rGLUE - pointer to interpState * rFP - Dalvik frame pointer */ cmp lr, #0 - ldrne r1,[lr, #-1] + ldrne r1,[lr, #3] ldr r2, .LinterpPunt mov r0, r1 @ set Dalvik PC bx r2 |