diff options
-rw-r--r-- | vm/compiler/Compiler.c | 7 | ||||
-rw-r--r-- | vm/compiler/Compiler.h | 22 | ||||
-rw-r--r-- | vm/compiler/CompilerIR.h | 1 | ||||
-rw-r--r-- | vm/compiler/Frontend.c | 26 | ||||
-rw-r--r-- | vm/compiler/codegen/CompilerCodegen.h | 9 | ||||
-rw-r--r-- | vm/compiler/codegen/armv5te/ArchUtility.c | 2 | ||||
-rw-r--r-- | vm/compiler/codegen/armv5te/Armv5teLIR.h | 2 | ||||
-rw-r--r-- | vm/compiler/codegen/armv5te/Assemble.c | 296 | ||||
-rw-r--r-- | vm/compiler/codegen/armv5te/Codegen.c | 12 | ||||
-rw-r--r-- | vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c | 9 | ||||
-rw-r--r-- | vm/compiler/codegen/armv5te/armv5te/ArchVariant.c | 9 | ||||
-rw-r--r-- | vm/interp/Jit.c | 443 | ||||
-rw-r--r-- | vm/interp/Jit.h | 28 |
13 files changed, 475 insertions, 391 deletions
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c index f70b9787b..203a08020 100644 --- a/vm/compiler/Compiler.c +++ b/vm/compiler/Compiler.c @@ -118,7 +118,6 @@ static void *compilerThreadStart(void *arg) continue; } else { do { - void *compiledCodePtr; CompilerWorkOrder work = workDequeue(); dvmUnlockMutex(&gDvmJit.compilerLock); /* Check whether there is a suspend request on me */ @@ -131,10 +130,10 @@ static void *compilerThreadStart(void *arg) if (gDvmJit.haltCompilerThread) { LOGD("Compiler shutdown in progress - discarding request"); } else { - compiledCodePtr = dvmCompilerDoWork(&work); /* Compilation is successful */ - if (compiledCodePtr) { - dvmJitSetCodeAddr(work.pc, compiledCodePtr); + if (dvmCompilerDoWork(&work)) { + dvmJitSetCodeAddr(work.pc, work.result.codeAddress, + work.result.instructionSet); } } free(work.info); diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h index 093d48ace..2cd112bb3 100644 --- a/vm/compiler/Compiler.h +++ b/vm/compiler/Compiler.h @@ -25,6 +25,21 @@ #define COMPILER_TRACEE(X) #define COMPILER_TRACE_CHAINING(X) +typedef enum JitInstructionSetType { + DALVIK_JIT_NONE = 0, + DALVIK_JIT_ARM, + DALVIK_JIT_THUMB, + DALVIK_JIT_THUMB2, + DALVIK_JIT_THUMBEE, + DALVIK_JIT_X86 +} JitInstructionSetType; + +/* Description of a compiled trace. */ +typedef struct JitTranslationInfo { + void *codeAddress; + JitInstructionSetType instructionSet; +} JitTranslationInfo; + typedef enum WorkOrderKind { kWorkOrderInvalid = 0, // Should never see by the backend kWorkOrderMethod = 1, // Work is to compile a whole method @@ -35,6 +50,7 @@ typedef struct CompilerWorkOrder { const u2* pc; WorkOrderKind kind; void* info; + JitTranslationInfo result; } CompilerWorkOrder; typedef enum JitState { @@ -98,10 +114,12 @@ bool dvmCompilerStartup(void); void dvmCompilerShutdown(void); bool dvmCompilerWorkEnqueue(const u2* pc, WorkOrderKind kind, void* info); void *dvmCheckCodeCache(void *method); -void *dvmCompileMethod(const Method *method); -void *dvmCompileTrace(JitTraceDescription *trace, int numMaxInsts); +bool dvmCompileMethod(const Method *method, JitTranslationInfo *info); +bool dvmCompileTrace(JitTraceDescription *trace, int numMaxInsts, + JitTranslationInfo *info); void dvmCompilerDumpStats(void); void dvmCompilerDrainQueue(void); void dvmJitUnchainAll(void); +void dvmCompilerSortAndPrintTraceProfiles(void); #endif /* _DALVIK_VM_COMPILER */ diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h index 201b0cc5d..d61d2eeff 100644 --- a/vm/compiler/CompilerIR.h +++ b/vm/compiler/CompilerIR.h @@ -91,6 +91,7 @@ typedef struct CompilationUnit { LIR *firstChainingLIR[CHAINING_CELL_LAST]; RegisterScoreboard registerScoreboard; // Track register dependency int optRound; // round number to tell an LIR's age + JitInstructionSetType instructionSet; } CompilationUnit; BasicBlock *dvmCompilerNewBB(BBType blockType); diff --git a/vm/compiler/Frontend.c b/vm/compiler/Frontend.c index 77548d94b..c71797557 100644 --- a/vm/compiler/Frontend.c +++ b/vm/compiler/Frontend.c @@ -239,7 +239,8 @@ static CompilerMethodStats *analyzeMethodBody(const Method *method) * first and they will be passed to the codegen routines to convert Dalvik * bytecode into machine code. */ -void *dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts) +bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, + JitTranslationInfo *info) { const DexCode *dexCode = dvmGetMethodCode(desc->method); const JitTraceRun* currRun = &desc->trace[0]; @@ -523,11 +524,14 @@ void *dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts) dvmCompilerDumpCompilationUnit(&cUnit); } + /* Set the instruction set to use (NOTE: later components may change it) */ + cUnit.instructionSet = dvmCompilerInstructionSet(&cUnit); + /* Convert MIR to LIR, etc. */ dvmCompilerMIR2LIR(&cUnit); /* Convert LIR into machine code. */ - dvmCompilerAssembleLIR(&cUnit); + dvmCompilerAssembleLIR(&cUnit, info); if (cUnit.printMe) { if (cUnit.halveInstCount) { @@ -546,17 +550,14 @@ void *dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts) /* Free the bit vector tracking null-checked registers */ dvmFreeBitVector(cUnit.registerScoreboard.nullCheckedRegs); - /* - * Things have gone smoothly - publish the starting address of - * translation's entry point. - */ if (!cUnit.halveInstCount) { + /* Success */ methodStats->nativeSize += cUnit.totalSize; - return cUnit.baseAddr + cUnit.headerSize; + return info->codeAddress != NULL; /* Halve the instruction count and retry again */ } else { - return dvmCompileTrace(desc, cUnit.numInsts / 2); + return dvmCompileTrace(desc, cUnit.numInsts / 2, info); } } @@ -567,7 +568,7 @@ void *dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts) * TODO: implementation will be revisited when the trace builder can provide * whole-method traces. */ -void *dvmCompileMethod(const Method *method) +bool dvmCompileMethod(const Method *method, JitTranslationInfo *info) { const DexCode *dexCode = dvmGetMethodCode(method); const u2 *codePtr = dexCode->insns; @@ -732,13 +733,16 @@ void *dvmCompileMethod(const Method *method) } } + /* Set the instruction set to use (NOTE: later components may change it) */ + cUnit.instructionSet = dvmCompilerInstructionSet(&cUnit); + dvmCompilerMIR2LIR(&cUnit); - dvmCompilerAssembleLIR(&cUnit); + dvmCompilerAssembleLIR(&cUnit, info); dvmCompilerDumpCompilationUnit(&cUnit); dvmCompilerArenaReset(); - return cUnit.baseAddr + cUnit.headerSize; + return info->codeAddress != NULL; } diff --git a/vm/compiler/codegen/CompilerCodegen.h b/vm/compiler/codegen/CompilerCodegen.h index 97077b415..c9e6bd6df 100644 --- a/vm/compiler/codegen/CompilerCodegen.h +++ b/vm/compiler/codegen/CompilerCodegen.h @@ -20,18 +20,23 @@ #define _DALVIK_VM_COMPILERCODEGEN_H_ /* Work unit is architecture dependent */ -void *dvmCompilerDoWork(CompilerWorkOrder *work); +bool dvmCompilerDoWork(CompilerWorkOrder *work); /* Lower middle-level IR to low-level IR */ void dvmCompilerMIR2LIR(CompilationUnit *cUnit); /* Assemble LIR into machine code */ -void dvmCompilerAssembleLIR(CompilationUnit *cUnit); +void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info); /* Implemented in the codegen/<target>/ArchUtility.c */ void dvmCompilerCodegenDump(CompilationUnit *cUnit); /* Implemented in the codegen/<target>/Assembler.c */ void* dvmJitChain(void *tgtAddr, u4* branchAddr); +u4* dvmJitUnchain(void *codeAddr); +void dvmJitUnchainAll(void); + +/* Implemented in codegen/<target>/<target_variant>/ArchVariant.c */ +JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit); #endif /* _DALVIK_VM_COMPILERCODEGEN_H_ */ diff --git a/vm/compiler/codegen/armv5te/ArchUtility.c b/vm/compiler/codegen/armv5te/ArchUtility.c index e45c57291..d7a0ce62c 100644 --- a/vm/compiler/codegen/armv5te/ArchUtility.c +++ b/vm/compiler/codegen/armv5te/ArchUtility.c @@ -224,7 +224,7 @@ void dvmCompilerCodegenDump(CompilationUnit *cUnit) for (lirInsn = cUnit->wordList; lirInsn; lirInsn = lirInsn->next) { armLIR = (Armv5teLIR *) lirInsn; LOGD("%p (%04x): .word (0x%x)\n", - cUnit->baseAddr + armLIR->generic.offset, armLIR->generic.offset, + (char*)cUnit->baseAddr + armLIR->generic.offset, armLIR->generic.offset, armLIR->operands[0]); } } diff --git a/vm/compiler/codegen/armv5te/Armv5teLIR.h b/vm/compiler/codegen/armv5te/Armv5teLIR.h index 709b95fe3..6408038b2 100644 --- a/vm/compiler/codegen/armv5te/Armv5teLIR.h +++ b/vm/compiler/codegen/armv5te/Armv5teLIR.h @@ -179,6 +179,7 @@ typedef struct Armv5teEncodingMap { int flags; char *name; char* fmt; + int size; } Armv5teEncodingMap; extern Armv5teEncodingMap EncodingMap[ARMV5TE_LAST]; @@ -194,6 +195,7 @@ typedef struct Armv5teLIR { int operands[3]; // [0..2] = [dest, src1, src2] bool isNop; // LIR is optimized away int age; // default is 0, set lazily by the optimizer + int size; // 16-bit unit size (1 for thumb, 1 or 2 for thumb2) } Armv5teLIR; /* Chain cell for predicted method invocation */ diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c index 3b3c161d0..f2f6e8c53 100644 --- a/vm/compiler/codegen/armv5te/Assemble.c +++ b/vm/compiler/codegen/armv5te/Assemble.c @@ -36,9 +36,9 @@ * fmt: for pretty-prining */ #define ENCODING_MAP(opcode, skeleton, ds, de, s1s, s1e, s2s, s2e, operands, \ - name, fmt) \ + name, fmt, size) \ {skeleton, {{ds, de}, {s1s, s1e}, {s2s, s2e}}, opcode, operands, name, \ - fmt} + fmt, size} /* Instruction dump string format keys: !pf, where "!" is the start * of the key, "p" is which numeric operand to use and "f" is the @@ -67,224 +67,224 @@ Armv5teEncodingMap EncodingMap[ARMV5TE_LAST] = { ENCODING_MAP(ARMV5TE_16BIT_DATA, 0x0000, 15, 0, -1, -1, -1, -1, IS_UNARY_OP, - "data", "0x!0h(!0d)"), + "data", "0x!0h(!0d)", 1), ENCODING_MAP(ARMV5TE_ADC, 0x4140, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "adc", "r!0d, r!1d"), + "adc", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_ADD_RRI3, 0x1c00, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "add", "r!0d, r!1d, #!2d"), + "add", "r!0d, r!1d, #!2d", 1), ENCODING_MAP(ARMV5TE_ADD_RI8, 0x3000, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "add", "r!0d, r!0d, #!1d"), + "add", "r!0d, r!0d, #!1d", 1), ENCODING_MAP(ARMV5TE_ADD_RRR, 0x1800, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "add", "r!0d, r!1d, r!2d"), + "add", "r!0d, r!1d, r!2d", 1), ENCODING_MAP(ARMV5TE_ADD_RR_LH, 0x4440, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "add", - "r!0d, r!1d"), + "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_ADD_RR_HL, 0x4480, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "add", "r!0d, r!1d"), + "add", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_ADD_RR_HH, 0x44c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "add", "r!0d, r!1d"), + "add", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_ADD_PC_REL, 0xa000, 10, 8, 7, 0, -1, -1, IS_TERTIARY_OP | CLOBBER_DEST, - "add", "r!0d, pc, #!1E"), + "add", "r!0d, pc, #!1E", 1), ENCODING_MAP(ARMV5TE_ADD_SP_REL, 0xa800, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "add", "r!0d, sp, #!1E"), + "add", "r!0d, sp, #!1E", 1), ENCODING_MAP(ARMV5TE_ADD_SPI7, 0xb000, 6, 0, -1, -1, -1, -1, IS_UNARY_OP | CLOBBER_DEST, - "add", "sp, #!0d*4"), + "add", "sp, #!0d*4", 1), ENCODING_MAP(ARMV5TE_AND_RR, 0x4000, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "and", "r!0d, r!1d"), + "and", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_ASR, 0x1000, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "asr", "r!0d, r!1d, #!2d"), + "asr", "r!0d, r!1d, #!2d", 1), ENCODING_MAP(ARMV5TE_ASRV, 0x4100, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "asr", "r!0d, r!1d"), + "asr", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_B_COND, 0xd000, 7, 0, 11, 8, -1, -1, IS_BINARY_OP | IS_BRANCH, - "!1c", "!0t"), + "!1c", "!0t", 1), ENCODING_MAP(ARMV5TE_B_UNCOND, 0xe000, 10, 0, -1, -1, -1, -1, NO_OPERAND | IS_BRANCH, - "b", "!0t"), + "b", "!0t", 1), ENCODING_MAP(ARMV5TE_BIC, 0x4380, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "bic", "r!0d, r!1d"), + "bic", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_BKPT, 0xbe00, 7, 0, -1, -1, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bkpt", "!0d"), + "bkpt", "!0d", 1), ENCODING_MAP(ARMV5TE_BLX_1, 0xf000, 10, 0, -1, -1, -1, -1, IS_BINARY_OP | IS_BRANCH, - "blx_1", "!0u"), + "blx_1", "!0u", 1), ENCODING_MAP(ARMV5TE_BLX_2, 0xe800, 10, 0, -1, -1, -1, -1, IS_BINARY_OP | IS_BRANCH, - "blx_2", "!0v"), + "blx_2", "!0v", 1), ENCODING_MAP(ARMV5TE_BL_1, 0xf000, 10, 0, -1, -1, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bl_1", "!0u"), + "bl_1", "!0u", 1), ENCODING_MAP(ARMV5TE_BL_2, 0xf800, 10, 0, -1, -1, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bl_2", "!0v"), + "bl_2", "!0v", 1), ENCODING_MAP(ARMV5TE_BLX_R, 0x4780, 6, 3, -1, -1, -1, -1, IS_UNARY_OP | IS_BRANCH, - "blx", "r!0d"), + "blx", "r!0d", 1), ENCODING_MAP(ARMV5TE_BX, 0x4700, 6, 3, -1, -1, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bx", "r!0d"), + "bx", "r!0d", 1), ENCODING_MAP(ARMV5TE_CMN, 0x42c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP, - "cmn", "r!0d, r!1d"), + "cmn", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_CMP_RI8, 0x2800, 10, 8, 7, 0, -1, -1, IS_BINARY_OP, - "cmp", "r!0d, #!1d"), + "cmp", "r!0d, #!1d", 1), ENCODING_MAP(ARMV5TE_CMP_RR, 0x4280, 2, 0, 5, 3, -1, -1, IS_BINARY_OP, - "cmp", "r!0d, r!1d"), + "cmp", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_CMP_LH, 0x4540, 2, 0, 5, 3, -1, -1, IS_BINARY_OP, - "cmp", "r!0d, r!1D"), + "cmp", "r!0d, r!1D", 1), ENCODING_MAP(ARMV5TE_CMP_HL, 0x4580, 2, 0, 5, 3, -1, -1, IS_BINARY_OP, - "cmp", "r!0D, r!1d"), + "cmp", "r!0D, r!1d", 1), ENCODING_MAP(ARMV5TE_CMP_HH, 0x45c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP, - "cmp", "r!0D, r!1D"), + "cmp", "r!0D, r!1D", 1), ENCODING_MAP(ARMV5TE_EOR, 0x4040, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "eor", "r!0d, r!1d"), + "eor", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_LDMIA, 0xc800, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1, - "ldmia", "r!0d!!, <!1R>"), + "ldmia", "r!0d!!, <!1R>", 1), ENCODING_MAP(ARMV5TE_LDR_RRI5, 0x6800, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldr", "r!0d, [r!1d, #!2E]"), + "ldr", "r!0d, [r!1d, #!2E]", 1), ENCODING_MAP(ARMV5TE_LDR_RRR, 0x5800, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldr", "r!0d, [r!1d, r!2d]"), + "ldr", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_LDR_PC_REL, 0x4800, 10, 8, 7, 0, -1, -1, IS_TERTIARY_OP | CLOBBER_DEST, - "ldr", "r!0d, [pc, #!1E]"), + "ldr", "r!0d, [pc, #!1E]", 1), ENCODING_MAP(ARMV5TE_LDR_SP_REL, 0x9800, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "ldr", "r!0d, [sp, #!1E]"), + "ldr", "r!0d, [sp, #!1E]", 1), ENCODING_MAP(ARMV5TE_LDRB_RRI5, 0x7800, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldrb", "r!0d, [r!1d, #2d]"), + "ldrb", "r!0d, [r!1d, #2d]", 1), ENCODING_MAP(ARMV5TE_LDRB_RRR, 0x5c00, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldrb", "r!0d, [r!1d, r!2d]"), + "ldrb", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_LDRH_RRI5, 0x8800, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldrh", "r!0d, [r!1d, #!2F]"), + "ldrh", "r!0d, [r!1d, #!2F]", 1), ENCODING_MAP(ARMV5TE_LDRH_RRR, 0x5a00, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldrh", "r!0d, [r!1d, r!2d]"), + "ldrh", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_LDRSB_RRR, 0x5600, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldrsb", "r!0d, [r!1d, r!2d]"), + "ldrsb", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_LDRSH_RRR, 0x5e00, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "ldrsh", "r!0d, [r!1d, r!2d]"), + "ldrsh", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_LSL, 0x0000, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "lsl", "r!0d, r!1d, #!2d"), + "lsl", "r!0d, r!1d, #!2d", 1), ENCODING_MAP(ARMV5TE_LSLV, 0x4080, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "lsl", "r!0d, r!1d"), + "lsl", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_LSR, 0x0800, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "lsr", "r!0d, r!1d, #!2d"), + "lsr", "r!0d, r!1d, #!2d", 1), ENCODING_MAP(ARMV5TE_LSRV, 0x40c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "lsr", "r!0d, r!1d"), + "lsr", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_MOV_IMM, 0x2000, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mov", "r!0d, #!1d"), + "mov", "r!0d, #!1d", 1), ENCODING_MAP(ARMV5TE_MOV_RR, 0x1c00, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mov", "r!0d, r!1d"), + "mov", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_MOV_RR_H2H, 0x46c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mov", "r!0D, r!1D"), + "mov", "r!0D, r!1D", 1), ENCODING_MAP(ARMV5TE_MOV_RR_H2L, 0x4640, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mov", "r!0d, r!1D"), + "mov", "r!0d, r!1D", 1), ENCODING_MAP(ARMV5TE_MOV_RR_L2H, 0x4680, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mov", "r!0D, r!1d"), + "mov", "r!0D, r!1d", 1), ENCODING_MAP(ARMV5TE_MUL, 0x4340, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mul", "r!0d, r!1d"), + "mul", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_MVN, 0x43c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "mvn", "r!0d, r!1d"), + "mvn", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_NEG, 0x4240, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "neg", "r!0d, r!1d"), + "neg", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_ORR, 0x4300, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "orr", "r!0d, r!1d"), + "orr", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_POP, 0xbc00, 8, 0, -1, -1, -1, -1, IS_UNARY_OP, - "pop", "<!0R>"), + "pop", "<!0R>", 1), ENCODING_MAP(ARMV5TE_PUSH, 0xb400, 8, 0, -1, -1, -1, -1, IS_UNARY_OP, - "push", "<!0R>"), + "push", "<!0R>", 1), ENCODING_MAP(ARMV5TE_ROR, 0x41c0, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "ror", "r!0d, r!1d"), + "ror", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_SBC, 0x4180, 2, 0, 5, 3, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "sbc", "r!0d, r!1d"), + "sbc", "r!0d, r!1d", 1), ENCODING_MAP(ARMV5TE_STMIA, 0xc000, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_SRC1, - "stmia", "r!0d!!, <!1R>"), + "stmia", "r!0d!!, <!1R>", 1), ENCODING_MAP(ARMV5TE_STR_RRI5, 0x6000, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP, - "str", "r!0d, [r!1d, #!2E]"), + "str", "r!0d, [r!1d, #!2E]", 1), ENCODING_MAP(ARMV5TE_STR_RRR, 0x5000, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP, - "str", "r!0d, [r!1d, r!2d]"), + "str", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_STR_SP_REL, 0x9000, 10, 8, 7, 0, -1, -1, IS_BINARY_OP, - "str", "r!0d, [sp, #!1E]"), + "str", "r!0d, [sp, #!1E]", 1), ENCODING_MAP(ARMV5TE_STRB_RRI5, 0x7000, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP, - "strb", "r!0d, [r!1d, #!2d]"), + "strb", "r!0d, [r!1d, #!2d]", 1), ENCODING_MAP(ARMV5TE_STRB_RRR, 0x5400, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP, - "strb", "r!0d, [r!1d, r!2d]"), + "strb", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_STRH_RRI5, 0x8000, 2, 0, 5, 3, 10, 6, IS_TERTIARY_OP, - "strh", "r!0d, [r!1d, #!2F]"), + "strh", "r!0d, [r!1d, #!2F]", 1), ENCODING_MAP(ARMV5TE_STRH_RRR, 0x5200, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP, - "strh", "r!0d, [r!1d, r!2d]"), + "strh", "r!0d, [r!1d, r!2d]", 1), ENCODING_MAP(ARMV5TE_SUB_RRI3, 0x1e00, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "sub", "r!0d, r!1d, #!2d]"), + "sub", "r!0d, r!1d, #!2d]", 1), ENCODING_MAP(ARMV5TE_SUB_RI8, 0x3800, 10, 8, 7, 0, -1, -1, IS_BINARY_OP | CLOBBER_DEST, - "sub", "r!0d, #!1d"), + "sub", "r!0d, #!1d", 1), ENCODING_MAP(ARMV5TE_SUB_RRR, 0x1a00, 2, 0, 5, 3, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, - "sub", "r!0d, r!1d, r!2d"), + "sub", "r!0d, r!1d, r!2d", 1), ENCODING_MAP(ARMV5TE_SUB_SPI7, 0xb080, 6, 0, -1, -1, -1, -1, IS_UNARY_OP | CLOBBER_DEST, - "sub", "sp, #!0d"), + "sub", "sp, #!0d", 1), ENCODING_MAP(ARMV5TE_SWI, 0xdf00, 7, 0, -1, -1, -1, -1, IS_UNARY_OP | IS_BRANCH, - "swi", "!0d"), + "swi", "!0d", 1), ENCODING_MAP(ARMV5TE_TST, 0x4200, 2, 0, 5, 3, -1, -1, IS_UNARY_OP, - "tst", "r!0d, r!1d"), + "tst", "r!0d, r!1d", 1), }; #define PADDING_MOV_R0_R0 0x1C00 @@ -441,7 +441,7 @@ static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr) * before sending them off to the assembler. If out-of-range branch distance is * seen rearrange the instructions a bit to correct it. */ -void dvmCompilerAssembleLIR(CompilationUnit *cUnit) +void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) { LIR *lir; Armv5teLIR *armLIR; @@ -450,6 +450,9 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit) ChainCellCounts chainCellCounts; int descSize = jitTraceDescriptionSize(cUnit->traceDesc); + info->codeAddress = NULL; + info->instructionSet = cUnit->instructionSet; + /* Beginning offset needs to allow space for chain cell offset */ for (armLIR = (Armv5teLIR *) cUnit->firstLIRInsn; armLIR; @@ -553,6 +556,13 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit) /* Flush dcache and invalidate the icache to maintain coherence */ cacheflush((long)cUnit->baseAddr, (long)((char *) cUnit->baseAddr + offset), 0); + + /* Record code entry point and instruction set */ + info->codeAddress = (char*)cUnit->baseAddr + cUnit->headerSize; + info->instructionSet = cUnit->instructionSet; + /* If applicable, mark low bit to denote thumb */ + if (info->instructionSet != DALVIK_JIT_ARM) + info->codeAddress = (char*)info->codeAddress + 1; } static u4 assembleBXPair(int branchOffset) @@ -795,3 +805,137 @@ void dvmJitUnchainAll() dvmUnlockMutex(&gDvmJit.tableLock); } } + +typedef struct jitProfileAddrToLine { + u4 lineNum; + u4 bytecodeOffset; +} jitProfileAddrToLine; + + +/* Callback function to track the bytecode offset/line number relationiship */ +static int addrToLineCb (void *cnxt, u4 bytecodeOffset, u4 lineNum) +{ + jitProfileAddrToLine *addrToLine = (jitProfileAddrToLine *) cnxt; + + /* Best match so far for this offset */ + if (addrToLine->bytecodeOffset >= bytecodeOffset) { + addrToLine->lineNum = lineNum; + } + return 0; +} + +char *getTraceBase(const JitEntry *p) +{ + return (char*)p->codeAddress - + (6 + (p->u.info.instructionSet == DALVIK_JIT_ARM ? 0 : 1)); +} + +/* Dumps profile info for a single trace */ +static int dumpTraceProfile(JitEntry *p) +{ + ChainCellCounts* pCellCounts; + char* traceBase; + u4* pExecutionCount; + u2* pCellOffset; + JitTraceDescription *desc; + const Method* method; + + traceBase = getTraceBase(p); + + if (p->codeAddress == NULL) { + LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase); + return 0; + } + + pExecutionCount = (u4*) (traceBase); + pCellOffset = (u2*) (traceBase + 4); + pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset); + desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts)); + method = desc->method; + char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype); + jitProfileAddrToLine addrToLine = {0, desc->trace[0].frag.startOffset}; + + /* + * We may end up decoding the debug information for the same method + * multiple times, but the tradeoff is we don't need to allocate extra + * space to store the addr/line mapping. Since this is a debugging feature + * and done infrequently so the slower but simpler mechanism should work + * just fine. + */ + dexDecodeDebugInfo(method->clazz->pDvmDex->pDexFile, + dvmGetMethodCode(method), + method->clazz->descriptor, + method->prototype.protoIdx, + method->accessFlags, + addrToLineCb, NULL, &addrToLine); + + LOGD("TRACEPROFILE 0x%08x % 10d [%#x(+%d), %d] %s%s;%s", + (int)traceBase, + *pExecutionCount, + desc->trace[0].frag.startOffset, + desc->trace[0].frag.numInsts, + addrToLine.lineNum, + method->clazz->descriptor, method->name, methodDesc); + free(methodDesc); + + return *pExecutionCount; +} + +/* Handy function to retrieve the profile count */ +static inline int getProfileCount(const JitEntry *entry) +{ + if (entry->dPC == 0 || entry->codeAddress == 0) + return 0; + u4 *pExecutionCount = (u4 *) getTraceBase(entry); + + return *pExecutionCount; +} + + +/* qsort callback function */ +static int sortTraceProfileCount(const void *entry1, const void *entry2) +{ + const JitEntry *jitEntry1 = entry1; + const JitEntry *jitEntry2 = entry2; + + int count1 = getProfileCount(jitEntry1); + int count2 = getProfileCount(jitEntry2); + return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); +} + +/* Sort the trace profile counts and dump them */ +void dvmCompilerSortAndPrintTraceProfiles() +{ + JitEntry *sortedEntries; + int numTraces = 0; + unsigned long counts = 0; + unsigned int i; + + /* Make sure that the table is not changing */ + dvmLockMutex(&gDvmJit.tableLock); + + /* Sort the entries by descending order */ + sortedEntries = malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); + if (sortedEntries == NULL) + goto done; + memcpy(sortedEntries, gDvmJit.pJitEntryTable, + sizeof(JitEntry) * gDvmJit.jitTableSize); + qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), + sortTraceProfileCount); + + /* Dump the sorted entries */ + for (i=0; i < gDvmJit.jitTableSize; i++) { + if (sortedEntries[i].dPC != 0) { + counts += dumpTraceProfile(&sortedEntries[i]); + numTraces++; + } + } + if (numTraces == 0) + numTraces = 1; + LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces)); + + free(sortedEntries); +done: + dvmUnlockMutex(&gDvmJit.tableLock); + return; +} diff --git a/vm/compiler/codegen/armv5te/Codegen.c b/vm/compiler/codegen/armv5te/Codegen.c index 6d194725c..b8f4da4d2 100644 --- a/vm/compiler/codegen/armv5te/Codegen.c +++ b/vm/compiler/codegen/armv5te/Codegen.c @@ -3544,24 +3544,24 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) } /* Accept the work and start compiling */ -void *dvmCompilerDoWork(CompilerWorkOrder *work) +bool dvmCompilerDoWork(CompilerWorkOrder *work) { - void *res; + bool res; if (gDvmJit.codeCacheFull) { - return NULL; + return false; } switch (work->kind) { case kWorkOrderMethod: - res = dvmCompileMethod(work->info); + res = dvmCompileMethod(work->info, &work->result); break; case kWorkOrderTrace: /* Start compilation with maximally allowed trace length */ - res = dvmCompileTrace(work->info, JIT_MAX_TRACE_LEN); + res = dvmCompileTrace(work->info, JIT_MAX_TRACE_LEN, &work->result); break; default: - res = NULL; + res = false; dvmAbort(); } return res; diff --git a/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c index 4c978979e..583cf00d2 100644 --- a/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c +++ b/vm/compiler/codegen/armv5te/armv5te-vfp/ArchVariant.c @@ -22,6 +22,15 @@ #define USE_IN_CACHE_HANDLER 1 /* + * Determine the initial instruction set to be used for this trace. + * Later components may decide to change this. + */ +JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) +{ + return DALVIK_JIT_THUMB; +} + +/* * Jump to the out-of-line handler in ARM mode to finish executing the * remaining of more complex instructions. */ diff --git a/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c b/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c index 3d9d0141c..d0122c89f 100644 --- a/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c +++ b/vm/compiler/codegen/armv5te/armv5te/ArchVariant.c @@ -22,6 +22,15 @@ #define USE_IN_CACHE_HANDLER 1 /* + * Determine the initial instruction set to be used for this trace. + * Later components may decide to change this. + */ +JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) +{ + return DALVIK_JIT_THUMB; +} + +/* * Jump to the out-of-line handler in ARM mode to finish executing the * remaining of more complex instructions. */ diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c index 2d6ac5adb..8a5843b44 100644 --- a/vm/interp/Jit.c +++ b/vm/interp/Jit.c @@ -43,12 +43,12 @@ int dvmJitStartup(void) dvmInitMutex(&gDvmJit.tableLock); if (res && gDvm.executionMode == kExecutionModeJit) { - struct JitEntry *pJitTable = NULL; + JitEntry *pJitTable = NULL; unsigned char *pJitProfTable = NULL; assert(gDvm.jitTableSize && !(gDvm.jitTableSize & (gDvmJit.jitTableSize - 1))); // Power of 2? dvmLockMutex(&gDvmJit.tableLock); - pJitTable = (struct JitEntry*) + pJitTable = (JitEntry*) calloc(gDvmJit.jitTableSize, sizeof(*pJitTable)); if (!pJitTable) { LOGE("jit table allocation failed\n"); @@ -71,10 +71,10 @@ int dvmJitStartup(void) } memset(pJitProfTable,0,JIT_PROF_SIZE); for (i=0; i < gDvmJit.jitTableSize; i++) { - pJitTable[i].chain = gDvmJit.jitTableSize; + pJitTable[i].u.info.chain = gDvmJit.jitTableSize; } /* Is chain field wide enough for termination pattern? */ - assert(pJitTable[0].chain == gDvm.maxJitTableEntries); + assert(pJitTable[0].u.info.chain == gDvm.maxJitTableEntries); done: gDvmJit.pJitEntryTable = pJitTable; @@ -126,145 +126,6 @@ void dvmBumpPunt(int from) } #endif -typedef struct jitProfileAddrToLine { - u4 lineNum; - u4 bytecodeOffset; -} jitProfileAddrToLine; - - -/* Callback function to track the bytecode offset/line number relationiship */ -static int addrToLineCb (void *cnxt, u4 bytecodeOffset, u4 lineNum) -{ - jitProfileAddrToLine *addrToLine = (jitProfileAddrToLine *) cnxt; - - /* Best match so far for this offset */ - if (addrToLine->bytecodeOffset >= bytecodeOffset) { - addrToLine->lineNum = lineNum; - } - return 0; -} - -/* Dumps profile info for a single trace */ -int dvmCompilerDumpTraceProfile(struct JitEntry *p) -{ - ChainCellCounts* pCellCounts; - char* traceBase; - u4* pExecutionCount; - u2* pCellOffset; - JitTraceDescription *desc; - const Method* method; - - /* - * The codeAddress field has the low bit set to mark thumb - * mode. We need to strip that off before reconstructing the - * trace data. See the diagram in Assemble.c for more info - * on the trace layout in memory. - */ - traceBase = (char*)p->codeAddress - 7; - - if (p->codeAddress == NULL) { - LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase); - return 0; - } - - pExecutionCount = (u4*) (traceBase); - pCellOffset = (u2*) (traceBase + 4); - pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset); - desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts)); - method = desc->method; - char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype); - jitProfileAddrToLine addrToLine = {0, desc->trace[0].frag.startOffset}; - - /* - * We may end up decoding the debug information for the same method - * multiple times, but the tradeoff is we don't need to allocate extra - * space to store the addr/line mapping. Since this is a debugging feature - * and done infrequently so the slower but simpler mechanism should work - * just fine. - */ - dexDecodeDebugInfo(method->clazz->pDvmDex->pDexFile, - dvmGetMethodCode(method), - method->clazz->descriptor, - method->prototype.protoIdx, - method->accessFlags, - addrToLineCb, NULL, &addrToLine); - - LOGD("TRACEPROFILE 0x%08x % 10d [%#x(+%d), %d] %s%s;%s", - (int)traceBase, - *pExecutionCount, - desc->trace[0].frag.startOffset, - desc->trace[0].frag.numInsts, - addrToLine.lineNum, - method->clazz->descriptor, method->name, methodDesc); - free(methodDesc); - - return *pExecutionCount; -} - -/* Handy function to retrieve the profile count */ -static inline int getProfileCount(const JitEntry *entry) -{ - if (entry->dPC == 0 || entry->codeAddress == 0) - return 0; - /* - * The codeAddress field has the low bit set to mark thumb - * mode. We need to strip that off before reconstructing the - * trace data. See the diagram in Assemble.c for more info - * on the trace layout in memory. - */ - u4 *pExecutionCount = (u4 *) ((char*)entry->codeAddress - 7); - - return *pExecutionCount; -} - -/* qsort callback function */ -static int sortTraceProfileCount(const void *entry1, const void *entry2) -{ - const JitEntry *jitEntry1 = entry1; - const JitEntry *jitEntry2 = entry2; - - int count1 = getProfileCount(jitEntry1); - int count2 = getProfileCount(jitEntry2); - return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); -} - -/* Sort the trace profile counts and dump them */ -static void sortAndPrintTraceProfiles() -{ - JitEntry *sortedEntries; - int numTraces = 0; - unsigned long counts = 0; - unsigned int i; - - /* Make sure that the table is not changing */ - dvmLockMutex(&gDvmJit.tableLock); - - /* Sort the entries by descending order */ - sortedEntries = malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); - if (sortedEntries == NULL) - goto done; - memcpy(sortedEntries, gDvmJit.pJitEntryTable, - sizeof(JitEntry) * gDvmJit.jitTableSize); - qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), - sortTraceProfileCount); - - /* Dump the sorted entries */ - for (i=0; i < gDvmJit.jitTableSize; i++) { - if (sortedEntries[i].dPC != 0) { - counts += dvmCompilerDumpTraceProfile(&sortedEntries[i]); - numTraces++; - } - } - if (numTraces == 0) - numTraces = 1; - LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces)); - - free(sortedEntries); -done: - dvmUnlockMutex(&gDvmJit.tableLock); - return; -} - /* Dumps debugging & tuning stats to the log */ void dvmJitStats() { @@ -280,7 +141,7 @@ void dvmJitStats() hit++; else not_hit++; - if (gDvmJit.pJitEntryTable[i].chain != gDvmJit.jitTableSize) + if (gDvmJit.pJitEntryTable[i].u.info.chain != gDvmJit.jitTableSize) chains++; } LOGD( @@ -301,11 +162,12 @@ void dvmJitStats() gDvmJit.invokeNative, gDvmJit.returnOp); #endif if (gDvmJit.profile) { - sortAndPrintTraceProfiles(); + dvmCompilerSortAndPrintTraceProfiles(); } } } + /* * Final JIT shutdown. Only do this once, and do not attempt to restart * the JIT later. @@ -466,7 +328,7 @@ int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState) return switchInterp; } -static inline struct JitEntry *findJitEntry(const u2* pc) +static inline JitEntry *findJitEntry(const u2* pc) { int idx = dvmJitHash(pc); @@ -475,8 +337,8 @@ static inline struct JitEntry *findJitEntry(const u2* pc) return &gDvmJit.pJitEntryTable[idx]; else { int chainEndMarker = gDvmJit.jitTableSize; - while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) { - idx = gDvmJit.pJitEntryTable[idx].chain; + while (gDvmJit.pJitEntryTable[idx].u.info.chain != chainEndMarker) { + idx = gDvmJit.pJitEntryTable[idx].u.info.chain; if (gDvmJit.pJitEntryTable[idx].dPC == pc) return &gDvmJit.pJitEntryTable[idx]; } @@ -484,47 +346,12 @@ static inline struct JitEntry *findJitEntry(const u2* pc) return NULL; } -struct JitEntry *dvmFindJitEntry(const u2* pc) +JitEntry *dvmFindJitEntry(const u2* pc) { return findJitEntry(pc); } /* - * Allocate an entry in a JitTable. Assumes caller holds lock, if - * applicable. Normally used for table resizing. Will complain (die) - * if entry already exists in the table or if table is full. - */ -static struct JitEntry *allocateJitEntry(const u2* pc, struct JitEntry *table, - u4 size) -{ - struct JitEntry *p; - unsigned int idx; - unsigned int prev; - idx = dvmJitHashMask(pc, size-1); - while ((table[idx].chain != size) && (table[idx].dPC != pc)) { - idx = table[idx].chain; - } - assert(table[idx].dPC != pc); /* Already there */ - if (table[idx].dPC == NULL) { - /* use this slot */ - return &table[idx]; - } - /* Find a free entry and chain it in */ - prev = idx; - while (true) { - idx++; - if (idx == size) - idx = 0; /* Wraparound */ - if ((table[idx].dPC == NULL) || (idx == prev)) - break; - } - assert(idx != prev); - table[prev].chain = idx; - assert(table[idx].dPC == NULL); - return &table[idx]; -} - -/* * If a translated code address exists for the davik byte code * pointer return it. This routine needs to be fast. */ @@ -545,8 +372,8 @@ void* dvmJitGetCodeAddr(const u2* dPC) return gDvmJit.pJitEntryTable[idx].codeAddress; } else { int chainEndMarker = gDvmJit.jitTableSize; - while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) { - idx = gDvmJit.pJitEntryTable[idx].chain; + while (gDvmJit.pJitEntryTable[idx].u.info.chain != chainEndMarker) { + idx = gDvmJit.pJitEntryTable[idx].u.info.chain; if (gDvmJit.pJitEntryTable[idx].dPC == dPC) { #if defined(EXIT_STATS) gDvmJit.addrLookupsFound++; @@ -562,16 +389,108 @@ void* dvmJitGetCodeAddr(const u2* dPC) } /* + * Find an entry in the JitTable, creating if necessary. + * Returns null if table is full. + */ +JitEntry *dvmJitLookupAndAdd(const u2* dPC) +{ + u4 chainEndMarker = gDvmJit.jitTableSize; + u4 idx = dvmJitHash(dPC); + + /* Walk the bucket chain to find an exact match for our PC */ + while ((gDvmJit.pJitEntryTable[idx].u.info.chain != chainEndMarker) && + (gDvmJit.pJitEntryTable[idx].dPC != dPC)) { + idx = gDvmJit.pJitEntryTable[idx].u.info.chain; + } + + if (gDvmJit.pJitEntryTable[idx].dPC != dPC) { + /* + * No match. Aquire jitTableLock and find the last + * slot in the chain. Possibly continue the chain walk in case + * some other thread allocated the slot we were looking + * at previuosly (perhaps even the dPC we're trying to enter). + */ + dvmLockMutex(&gDvmJit.tableLock); + /* + * At this point, if .dPC is NULL, then the slot we're + * looking at is the target slot from the primary hash + * (the simple, and common case). Otherwise we're going + * to have to find a free slot and chain it. + */ + MEM_BARRIER(); /* Make sure we reload [].dPC after lock */ + if (gDvmJit.pJitEntryTable[idx].dPC != NULL) { + u4 prev; + while (gDvmJit.pJitEntryTable[idx].u.info.chain != chainEndMarker) { + if (gDvmJit.pJitEntryTable[idx].dPC == dPC) { + /* Another thread got there first for this dPC */ + dvmUnlockMutex(&gDvmJit.tableLock); + return &gDvmJit.pJitEntryTable[idx]; + } + idx = gDvmJit.pJitEntryTable[idx].u.info.chain; + } + /* Here, idx should be pointing to the last cell of an + * active chain whose last member contains a valid dPC */ + assert(gDvmJit.pJitEntryTable[idx].dPC != NULL); + /* Linear walk to find a free cell and add it to the end */ + prev = idx; + while (true) { + idx++; + if (idx == chainEndMarker) + idx = 0; /* Wraparound */ + if ((gDvmJit.pJitEntryTable[idx].dPC == NULL) || + (idx == prev)) + break; + } + if (idx != prev) { + JitEntryInfoUnion oldValue; + JitEntryInfoUnion newValue; + /* + * Although we hold the lock so that noone else will + * be trying to update a chain field, the other fields + * packed into the word may be in use by other threads. + */ + do { + oldValue = gDvmJit.pJitEntryTable[prev].u; + newValue = oldValue; + newValue.info.chain = idx; + } while (!ATOMIC_CMP_SWAP( + &gDvmJit.pJitEntryTable[prev].u.infoWord, + oldValue.infoWord, newValue.infoWord)); + } + } + if (gDvmJit.pJitEntryTable[idx].dPC == NULL) { + /* Allocate the slot */ + gDvmJit.pJitEntryTable[idx].dPC = dPC; + gDvmJit.jitTableEntriesUsed++; + } else { + /* Table is full */ + idx = chainEndMarker; + } + dvmUnlockMutex(&gDvmJit.tableLock); + } + return (idx == chainEndMarker) ? NULL : &gDvmJit.pJitEntryTable[idx]; +} +/* * Register the translated code pointer into the JitTable. * NOTE: Once a codeAddress field transitions from NULL to * JIT'd code, it must not be altered without first halting all - * threads. + * threads. This routine should only be called by the compiler + * thread. */ -void dvmJitSetCodeAddr(const u2* dPC, void *nPC) { - struct JitEntry *jitEntry = findJitEntry(dPC); +void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set) { + JitEntryInfoUnion oldValue; + JitEntryInfoUnion newValue; + JitEntry *jitEntry = dvmJitLookupAndAdd(dPC); assert(jitEntry); - /* Thumb code has odd PC */ - jitEntry->codeAddress = (void *) ((intptr_t) nPC |1); + /* Note: order of update is important */ + do { + oldValue = jitEntry->u; + newValue = oldValue; + newValue.info.instructionSet = set; + } while (!ATOMIC_CMP_SWAP( + &jitEntry->u.infoWord, + oldValue.infoWord, newValue.infoWord)); + jitEntry->codeAddress = nPC; } /* @@ -581,7 +500,6 @@ void dvmJitSetCodeAddr(const u2* dPC, void *nPC) { * requested */ -#define PROFILE_STALENESS_THRESHOLD 100000LL bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState) { bool res = false; /* Assume success */ @@ -616,78 +534,30 @@ bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState) interpState->jitState = kJitNormal; } } else if (interpState->jitState == kJitTSelectRequest) { - u4 chainEndMarker = gDvmJit.jitTableSize; - u4 idx = dvmJitHash(interpState->pc); - - /* Walk the bucket chain to find an exact match for our PC */ - while ((gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) && - (gDvmJit.pJitEntryTable[idx].dPC != interpState->pc)) { - idx = gDvmJit.pJitEntryTable[idx].chain; - } - - if (gDvmJit.pJitEntryTable[idx].dPC == interpState->pc) { + JitEntry *slot = dvmJitLookupAndAdd(interpState->pc); + if (slot == NULL) { /* - * Got a match. This means a trace has already - * been requested for this address. Bail back to - * mterp, which will check if the translation is ready - * for execution + * Table is full. This should have been + * detected by the compiler thread and the table + * resized before we run into it here. Assume bad things + * are afoot and disable profiling. */ interpState->jitState = kJitTSelectAbort; + LOGD("JIT: JitTable full, disabling profiling"); + dvmJitStopTranslationRequests(); + } else if (slot->u.info.traceRequested) { + /* Trace already requested - revert to interpreter */ + interpState->jitState = kJitTSelectAbort; } else { - /* - * No match. Aquire jitTableLock and find the last - * slot in the chain. Possibly continue the chain walk in case - * some other thread allocated the slot we were looking - * at previuosly - */ - dvmLockMutex(&gDvmJit.tableLock); - /* - * At this point, if .dPC is NULL, then the slot we're - * looking at is the target slot from the primary hash - * (the simple, and expected case). Otherwise we're going - * to have to find a free slot and chain it. - */ - MEM_BARRIER(); - if (gDvmJit.pJitEntryTable[idx].dPC != NULL) { - u4 prev; - while (gDvmJit.pJitEntryTable[idx].chain != chainEndMarker) { - idx = gDvmJit.pJitEntryTable[idx].chain; - } - /* Here, idx should be pointing to the last cell of an - * active chain whose last member contains a valid dPC */ - assert(gDvmJit.pJitEntryTable[idx].dPC != NULL); - /* Now, do a linear walk to find a free cell and add it to - * end of this chain */ - prev = idx; - while (true) { - idx++; - if (idx == chainEndMarker) - idx = 0; /* Wraparound */ - if ((gDvmJit.pJitEntryTable[idx].dPC == NULL) || - (idx == prev)) - break; - } - if (idx != prev) { - /* Got it - chain */ - gDvmJit.pJitEntryTable[prev].chain = idx; - } - } - if (gDvmJit.pJitEntryTable[idx].dPC == NULL) { - /* Allocate the slot */ - gDvmJit.pJitEntryTable[idx].dPC = interpState->pc; - gDvmJit.jitTableEntriesUsed++; - } else { - /* - * Table is full. We could resize it, but that would - * be better handled by the translator thread. It - * will be aware of how full the table is getting. - * Disable further profiling and continue. - */ - interpState->jitState = kJitTSelectAbort; - LOGD("JIT: JitTable full, disabling profiling"); - dvmJitStopTranslationRequests(); - } - dvmUnlockMutex(&gDvmJit.tableLock); + /* Mark request */ + JitEntryInfoUnion oldValue; + JitEntryInfoUnion newValue; + do { + oldValue = slot->u; + newValue = oldValue; + newValue.info.traceRequested = true; + } while (!ATOMIC_CMP_SWAP( &slot->u.infoWord, + oldValue.infoWord, newValue.infoWord)); } } switch (interpState->jitState) { @@ -725,8 +595,10 @@ bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState) */ bool dvmJitResizeJitTable( unsigned int size ) { - struct JitEntry *pNewTable; + JitEntry *pNewTable; + JitEntry *pOldTable; u4 newMask; + unsigned int oldSize; unsigned int i; assert(gDvm.pJitEntryTable != NULL); @@ -740,42 +612,51 @@ bool dvmJitResizeJitTable( unsigned int size ) return true; } - pNewTable = (struct JitEntry*)calloc(size, sizeof(*pNewTable)); + pNewTable = (JitEntry*)calloc(size, sizeof(*pNewTable)); if (pNewTable == NULL) { return true; } for (i=0; i< size; i++) { - pNewTable[i].chain = size; /* Initialize chain termination */ + pNewTable[i].u.info.chain = size; /* Initialize chain termination */ } /* Stop all other interpreting/jit'ng threads */ dvmSuspendAllThreads(SUSPEND_FOR_JIT); - /* - * At this point, only the compiler thread may be in contention - * for the jitEntryTable (it is not affected by the thread suspension). - * Aquire the lock. - */ + pOldTable = gDvmJit.pJitEntryTable; + oldSize = gDvmJit.jitTableSize; dvmLockMutex(&gDvmJit.tableLock); - - for (i=0; i < gDvmJit.jitTableSize; i++) { - if (gDvmJit.pJitEntryTable[i].dPC) { - struct JitEntry *p; - p = allocateJitEntry(gDvmJit.pJitEntryTable[i].dPC, - pNewTable, size); - p->dPC = gDvmJit.pJitEntryTable[i].dPC; - p->codeAddress = gDvmJit.pJitEntryTable[i].codeAddress; - } - } - - free(gDvmJit.pJitEntryTable); gDvmJit.pJitEntryTable = pNewTable; gDvmJit.jitTableSize = size; gDvmJit.jitTableMask = size - 1; - + gDvmJit.jitTableEntriesUsed = 0; dvmUnlockMutex(&gDvmJit.tableLock); + for (i=0; i < oldSize; i++) { + if (pOldTable[i].dPC) { + JitEntry *p; + u2 chain; + p = dvmJitLookupAndAdd(pOldTable[i].dPC); + p->dPC = pOldTable[i].dPC; + /* + * Compiler thread may have just updated the new entry's + * code address field, so don't blindly copy null. + */ + if (pOldTable[i].codeAddress != NULL) { + p->codeAddress = pOldTable[i].codeAddress; + } + /* We need to preserve the new chain field, but copy the rest */ + dvmLockMutex(&gDvmJit.tableLock); + chain = p->u.info.chain; + p->u = pOldTable[i].u; + p->u.info.chain = chain; + dvmUnlockMutex(&gDvmJit.tableLock); + } + } + + free(pOldTable); + /* Restart the world */ dvmResumeAllThreads(SUSPEND_FOR_JIT); diff --git a/vm/interp/Jit.h b/vm/interp/Jit.h index 31454e449..660b5ecf6 100644 --- a/vm/interp/Jit.h +++ b/vm/interp/Jit.h @@ -37,16 +37,29 @@ static inline u4 dvmJitHash( const u2* p ) { return dvmJitHashMask( p, gDvmJit.jitTableMask ); } - - /* * Entries in the JIT's address lookup hash table. - * with assembly hash function in mterp. - * TODO: rework this structure now that the profile counts have - * moved into their own table. + * Fields which may be updated by multiple threads packed into a + * single 32-bit word to allow use of atomic update. */ + +typedef struct JitEntryInfo { + unsigned int traceRequested:1; /* already requested a translation */ + unsigned int isMethodEntry:1; + unsigned int inlineCandidate:1; + unsigned int profileEnabled:1; + JitInstructionSetType instructionSet:4; + unsigned int unused:8; + u2 chain; /* Index of next in chain */ +} JitEntryInfo; + +typedef union JitEntryInfoUnion { + JitEntryInfo info; + volatile int infoWord; +} JitEntryInfoUnion; + typedef struct JitEntry { - u2 unused; /* was execution count */ + JitEntryInfoUnion u; u2 chain; /* Index of next in chain */ const u2* dPC; /* Dalvik code address */ void* codeAddress; /* Code address of native translation */ @@ -56,15 +69,14 @@ int dvmJitStartup(void); void dvmJitShutdown(void); int dvmCheckJit(const u2* pc, Thread* self, InterpState* interpState); void* dvmJitGetCodeAddr(const u2* dPC); -void dvmJitSetCodeAddr(const u2* dPC, void *nPC); bool dvmJitCheckTraceRequest(Thread* self, InterpState* interpState); -void* dvmJitChain(void* tgtAddr, u4* branchAddr); void dvmJitStopTranslationRequests(void); void dvmJitStats(void); bool dvmJitResizeJitTable(unsigned int size); struct JitEntry *dvmFindJitEntry(const u2* pc); s8 dvmJitd2l(double d); s8 dvmJitf2l(float f); +void dvmJitSetCodeAddr(const u2* dPC, void *nPC, JitInstructionSetType set); #endif /*_DALVIK_INTERP_JIT*/ |