diff options
Diffstat (limited to 'vm/compiler/codegen/x86/CodegenInterface.cpp')
| -rw-r--r-- | vm/compiler/codegen/x86/CodegenInterface.cpp | 1532 |
1 files changed, 1532 insertions, 0 deletions
diff --git a/vm/compiler/codegen/x86/CodegenInterface.cpp b/vm/compiler/codegen/x86/CodegenInterface.cpp new file mode 100644 index 000000000..aade180e0 --- /dev/null +++ b/vm/compiler/codegen/x86/CodegenInterface.cpp @@ -0,0 +1,1532 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sys/mman.h> +#include "Dalvik.h" +#include "libdex/DexOpcodes.h" +#include "compiler/Compiler.h" +#include "compiler/CompilerIR.h" +#include "interp/Jit.h" +#include "libdex/DexFile.h" +#include "Lower.h" +#include "NcgAot.h" +#include "compiler/codegen/CompilerCodegen.h" + +/* Init values when a predicted chain is initially assembled */ +/* E7FE is branch to self */ +#define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe + +/* Target-specific save/restore */ +extern "C" void dvmJitCalleeSave(double *saveArea); +extern "C" void dvmJitCalleeRestore(double *saveArea); + +/* + * Determine the initial instruction set to be used for this trace. + * Later components may decide to change this. + */ +//JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) +JitInstructionSetType dvmCompilerInstructionSet(void) +{ + return DALVIK_JIT_IA32; +} + +JitInstructionSetType dvmCompilerGetInterpretTemplateSet() +{ + return DALVIK_JIT_IA32; +} + +/* we don't use template for IA32 */ +void *dvmCompilerGetInterpretTemplate() +{ + return NULL; +} + +/* Track the number of times that the code cache is patched */ +#if defined(WITH_JIT_TUNING) +#define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++) +#else +#define UPDATE_CODE_CACHE_PATCHES() +#endif + +bool dvmCompilerArchInit() { + /* Target-specific configuration */ + gDvmJit.jitTableSize = 1 << 12; + gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; + gDvmJit.threshold = 255; + gDvmJit.codeCacheSize = 512*1024; + gDvmJit.optLevel = kJitOptLevelO1; + +#if defined(WITH_SELF_VERIFICATION) + /* Force into blocking mode */ + gDvmJit.blockingMode = true; + gDvm.nativeDebuggerActive = true; +#endif + + // Make sure all threads have current values + dvmJitUpdateThreadStateAll(); + + return true; +} + +void dvmCompilerPatchInlineCache(void) +{ + int i; + PredictedChainingCell *minAddr, *maxAddr; + + /* Nothing to be done */ + if (gDvmJit.compilerICPatchIndex == 0) return; + + /* + * Since all threads are already stopped we don't really need to acquire + * the lock. But race condition can be easily introduced in the future w/o + * paying attention so we still acquire the lock here. + */ + dvmLockMutex(&gDvmJit.compilerICPatchLock); + + UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex); + + /* Initialize the min/max address range */ + minAddr = (PredictedChainingCell *) + ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize); + maxAddr = (PredictedChainingCell *) gDvmJit.codeCache; + + for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) { + ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i]; + PredictedChainingCell *cellAddr = workOrder->cellAddr; + PredictedChainingCell *cellContent = &workOrder->cellContent; + ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor, + workOrder->classLoader); + + assert(clazz->serialNumber == workOrder->serialNumber); + + /* Use the newly resolved clazz pointer */ + cellContent->clazz = clazz; + + if (cellAddr->clazz == NULL) { + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized", + cellAddr, + cellContent->clazz->descriptor, + cellContent->method->name)); + } else { + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) " + "patched", + cellAddr, + cellAddr->clazz->descriptor, + cellContent->clazz->descriptor, + cellContent->method->name)); + } + + /* Patch the chaining cell */ + *cellAddr = *cellContent; + minAddr = (cellAddr < minAddr) ? cellAddr : minAddr; + maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr; + } + + PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + gDvmJit.compilerICPatchIndex = 0; + dvmUnlockMutex(&gDvmJit.compilerICPatchLock); +} + +/* Target-specific cache clearing */ +void dvmCompilerCacheClear(char *start, size_t size) +{ + /* "0xFF 0xFF" is an invalid opcode for x86. */ + memset(start, 0xFF, size); +} + +/* for JIT debugging, to be implemented */ +void dvmJitCalleeSave(double *saveArea) { +} + +void dvmJitCalleeRestore(double *saveArea) { +} + +void dvmJitToInterpSingleStep() { +} + +JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, + const JitEntry *knownEntry) { + return NULL; +} + +void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c +{ +} + +void dvmCompilerArchDump(void) +{ +} + +char *getTraceBase(const JitEntry *p) +{ + return NULL; +} + +void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info) +{ +} + +void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress) +{ +} + +void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit) +{ + ALOGE("Method-based JIT not supported for the x86 target"); + dvmAbort(); +} + +void dvmJitScanAllClassPointers(void (*callback)(void *)) +{ +} + +/* Handy function to retrieve the profile count */ +static inline int getProfileCount(const JitEntry *entry) +{ + if (entry->dPC == 0 || entry->codeAddress == 0) + return 0; + u4 *pExecutionCount = (u4 *) getTraceBase(entry); + + return pExecutionCount ? *pExecutionCount : 0; +} + +/* qsort callback function */ +static int sortTraceProfileCount(const void *entry1, const void *entry2) +{ + const JitEntry *jitEntry1 = (const JitEntry *)entry1; + const JitEntry *jitEntry2 = (const JitEntry *)entry2; + + JitTraceCounter_t count1 = getProfileCount(jitEntry1); + JitTraceCounter_t count2 = getProfileCount(jitEntry2); + return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); +} + +/* Sort the trace profile counts and dump them */ +void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c +{ + JitEntry *sortedEntries; + int numTraces = 0; + unsigned long counts = 0; + unsigned int i; + + /* Make sure that the table is not changing */ + dvmLockMutex(&gDvmJit.tableLock); + + /* Sort the entries by descending order */ + sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); + if (sortedEntries == NULL) + goto done; + memcpy(sortedEntries, gDvmJit.pJitEntryTable, + sizeof(JitEntry) * gDvmJit.jitTableSize); + qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), + sortTraceProfileCount); + + /* Dump the sorted entries */ + for (i=0; i < gDvmJit.jitTableSize; i++) { + if (sortedEntries[i].dPC != 0) { + numTraces++; + } + } + if (numTraces == 0) + numTraces = 1; + ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces)); + + free(sortedEntries); +done: + dvmUnlockMutex(&gDvmJit.tableLock); + return; +} + +void jumpWithRelOffset(char* instAddr, int relOffset) { + stream = instAddr; + OpndSize immSize = estOpndSizeFromImm(relOffset); + relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); + dump_imm(Mnemonic_JMP, immSize, relOffset); +} + +// works whether instructions for target basic block are generated or not +LowOp* jumpToBasicBlock(char* instAddr, int targetId) { + stream = instAddr; + bool unknown; + OpndSize size; + int relativeNCG = targetId; + relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size); + unconditional_jump_int(relativeNCG, size); + return NULL; +} + +LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) { + stream = instAddr; + bool unknown; + OpndSize size; + int relativeNCG = targetId; + relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size); + conditional_jump_int(cc, relativeNCG, size); + return NULL; +} + +/* + * Attempt to enqueue a work order to patch an inline cache for a predicted + * chaining cell for virtual/interface calls. + */ +static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr, + PredictedChainingCell *newContent) +{ + bool result = true; + + /* + * Make sure only one thread gets here since updating the cell (ie fast + * path and queueing the request (ie the queued path) have to be done + * in an atomic fashion. + */ + dvmLockMutex(&gDvmJit.compilerICPatchLock); + + /* Fast path for uninitialized chaining cell */ + if (cellAddr->clazz == NULL && + cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) { + UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + + cellAddr->method = newContent->method; + cellAddr->branch = newContent->branch; + cellAddr->branch2 = newContent->branch2; + + /* + * The update order matters - make sure clazz is updated last since it + * will bring the uninitialized chaining cell to life. + */ + android_atomic_release_store((int32_t)newContent->clazz, + (volatile int32_t *)(void*) &cellAddr->clazz); + //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + +#if 0 + MEM_BARRIER(); + cellAddr->clazz = newContent->clazz; + //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); +#endif +#if defined(IA_JIT_TUNING) + gDvmJit.icPatchInit++; +#endif + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p", + cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method)); + /* Check if this is a frequently missed clazz */ + } else if (cellAddr->stagedClazz != newContent->clazz) { + /* Not proven to be frequent yet - build up the filter cache */ + UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + + cellAddr->stagedClazz = newContent->clazz; + + UPDATE_CODE_CACHE_PATCHES(); + PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchRejected++; +#endif + /* + * Different classes but same method implementation - it is safe to just + * patch the class value without the need to stop the world. + */ + } else if (cellAddr->method == newContent->method) { + UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + + cellAddr->clazz = newContent->clazz; + /* No need to flush the cache here since the branch is not patched */ + UPDATE_CODE_CACHE_PATCHES(); + + PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); + +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchLockFree++; +#endif + /* + * Cannot patch the chaining cell inline - queue it until the next safe + * point. + */ + } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) { + int index = gDvmJit.compilerICPatchIndex++; + const ClassObject *clazz = newContent->clazz; + + gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr; + gDvmJit.compilerICPatchQueue[index].cellContent = *newContent; + gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor; + gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader; + /* For verification purpose only */ + gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber; + +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchQueued++; +#endif + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s", + cellAddr, newContent->clazz->descriptor, newContent->method->name)); + } else { + /* Queue is full - just drop this patch request */ +#if defined(WITH_JIT_TUNING) + gDvmJit.icPatchDropped++; +#endif + + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s", + cellAddr, newContent->clazz->descriptor, newContent->method->name)); + } + + dvmUnlockMutex(&gDvmJit.compilerICPatchLock); + return result; +} + +/* + * This method is called from the invoke templates for virtual and interface + * methods to speculatively setup a chain to the callee. The templates are + * written in assembly and have setup method, cell, and clazz at r0, r2, and + * r3 respectively, so there is a unused argument in the list. Upon return one + * of the following three results may happen: + * 1) Chain is not setup because the callee is native. Reset the rechain + * count to a big number so that it will take a long time before the next + * rechain attempt to happen. + * 2) Chain is not setup because the callee has not been created yet. Reset + * the rechain count to a small number and retry in the near future. + * 3) Ask all other threads to stop before patching this chaining cell. + * This is required because another thread may have passed the class check + * but hasn't reached the chaining cell yet to follow the chain. If we + * patch the content before halting the other thread, there could be a + * small window for race conditions to happen that it may follow the new + * but wrong chain to invoke a different method. + */ +const Method *dvmJitToPatchPredictedChain(const Method *method, + Thread *self, + PredictedChainingCell *cell, + const ClassObject *clazz) +{ + int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN; + /* Don't come back here for a long time if the method is native */ + if (dvmIsNativeMethod(method)) { + UNPROTECT_CODE_CACHE(cell, sizeof(*cell)); + + /* + * Put a non-zero/bogus value in the clazz field so that it won't + * trigger immediate patching and will continue to fail to match with + * a real clazz pointer. + */ + cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ; + + UPDATE_CODE_CACHE_PATCHES(); + PROTECT_CODE_CACHE(cell, sizeof(*cell)); + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: predicted chain %p to native method %s ignored", + cell, method->name)); + goto done; + } + { + int tgtAddr = (int) dvmJitGetTraceAddr(method->insns); + + /* + * Compilation not made yet for the callee. Reset the counter to a small + * value and come back to check soon. + */ + if ((tgtAddr == 0) || + ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed", + cell, method->clazz->descriptor, method->name)); + goto done; + } + + PredictedChainingCell newCell; + + if (cell->clazz == NULL) { + newRechainCount = self->icRechainCount; + } + + int relOffset = (int) tgtAddr - (int)cell; + OpndSize immSize = estOpndSizeFromImm(relOffset); + int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond); + relOffset -= jumpSize; + COMPILER_TRACE_CHAINING( + ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d", + cell, method->clazz->descriptor, method->name, jumpSize)); + //can't use stream here since it is used by the compilation thread + dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch + + newCell.clazz = clazz; + newCell.method = method; + + /* + * Enter the work order to the queue and the chaining cell will be patched + * the next time a safe point is entered. + * + * If the enqueuing fails reset the rechain count to a normal value so that + * it won't get indefinitely delayed. + */ + inlineCachePatchEnqueue(cell, &newCell); + } +done: + self->icRechainCount = newRechainCount; + return method; +} + +/* + * Unchain a trace given the starting address of the translation + * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR. + * For ARM, it returns the address following the last cell unchained. + * For IA, it returns NULL since cacheflush is not required for IA. + */ +u4* dvmJitUnchain(void* codeAddr) +{ + /* codeAddr is 4-byte aligned, so is chain cell count offset */ + u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4); + u2 chainCellCountOffset = *pChainCellCountOffset; + /* chain cell counts information is 4-byte aligned */ + ChainCellCounts *pChainCellCounts = + (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset); + u2* pChainCellOffset = (u2*)((char*)codeAddr - 2); + u2 chainCellOffset = *pChainCellOffset; + u1* pChainCells; + int i,j; + PredictedChainingCell *predChainCell; + int padding; + + /* Locate the beginning of the chain cell region */ + pChainCells = (u1 *)((char*)codeAddr + chainCellOffset); + + /* The cells are sorted in order - walk through them and reset */ + for (i = 0; i < kChainingCellGap; i++) { + /* for hot, normal, singleton chaining: + nop //padding. + jmp 0 + mov imm32, reg1 + mov imm32, reg2 + call reg2 + after chaining: + nop + jmp imm + mov imm32, reg1 + mov imm32, reg2 + call reg2 + after unchaining: + nop + jmp 0 + mov imm32, reg1 + mov imm32, reg2 + call reg2 + Space occupied by the chaining cell in bytes: nop is for padding, + jump 0, the target 0 is 4 bytes aligned. + Space for predicted chaining: 5 words = 20 bytes + */ + int elemSize = 0; + if (i == kChainingCellInvokePredicted) { + elemSize = 20; + } + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i])); + + for (j = 0; j < pChainCellCounts->u.count[i]; j++) { + switch(i) { + case kChainingCellNormal: + case kChainingCellHot: + case kChainingCellInvokeSingleton: + case kChainingCellBackwardBranch: + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: unchaining of normal, hot, or singleton")); + pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03)); + elemSize = 4+5+5+2; + memset(pChainCells, 0, 4); + break; + case kChainingCellInvokePredicted: + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: unchaining of predicted")); + /* 4-byte aligned */ + padding = (4 - ((u4)pChainCells & 3)) & 3; + pChainCells += padding; + predChainCell = (PredictedChainingCell *) pChainCells; + /* + * There could be a race on another mutator thread to use + * this particular predicted cell and the check has passed + * the clazz comparison. So we cannot safely wipe the + * method and branch but it is safe to clear the clazz, + * which serves as the key. + */ + predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; + break; + default: + ALOGE("Unexpected chaining type: %d", i); + dvmAbort(); // dvmAbort OK here - can't safely recover + } + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells)); + pChainCells += elemSize; /* Advance by a fixed number of bytes */ + } + } + return NULL; +} + +/* Unchain all translation in the cache. */ +void dvmJitUnchainAll() +{ + ALOGV("Jit Runtime: unchaining all"); + if (gDvmJit.pJitEntryTable != NULL) { + COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all")); + dvmLockMutex(&gDvmJit.tableLock); + + UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + for (size_t i = 0; i < gDvmJit.jitTableSize; i++) { + if (gDvmJit.pJitEntryTable[i].dPC && + !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry && + gDvmJit.pJitEntryTable[i].codeAddress) { + dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress); + } + } + + PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); + + dvmUnlockMutex(&gDvmJit.tableLock); + gDvmJit.translationChains = 0; + } + gDvmJit.hasNewChain = false; +} + +#define P_GPR_1 PhysicalReg_EBX +/* Add an additional jump instruction, keep jump target 4 bytes aligned.*/ +static void insertJumpHelp() +{ + int rem = (uint)stream % 4; + int nop_size = 3 - rem; + dump_nop(nop_size); + unconditional_jump_int(0, OpndSize_32); + return; +} + +/* Chaining cell for code that may need warmup. */ +/* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?) + blx r0 + data 0xb23a //bytecode address: 0x5115b23a + data 0x5115 + IA32 assembly: + jmp 0 //5 bytes + movl address, %ebx + movl dvmJitToInterpNormal, %eax + call %eax + <-- return address +*/ +static void handleNormalChainingCell(CompilationUnit *cUnit, + unsigned int offset, int blockId, LowOpBlockLabel* labelList) +{ + ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x", + cUnit->method->name, blockId, offset, stream - streamMethodStart); + if(dump_x86_inst) + ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p", + offset, stream - streamMethodStart, stream); + /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps + * reslove the multithreading issue. + */ + insertJumpHelp(); + move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); + scratchRegs[0] = PhysicalReg_EAX; + call_dvmJitToInterpNormal(); + //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ +} + +/* + * Chaining cell for instructions that immediately following already translated + * code. + */ +static void handleHotChainingCell(CompilationUnit *cUnit, + unsigned int offset, int blockId, LowOpBlockLabel* labelList) +{ + ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x", + cUnit->method->name, blockId, offset, stream - streamMethodStart); + if(dump_x86_inst) + ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p", + offset, stream - streamMethodStart, stream); + /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps + * reslove the multithreading issue. + */ + insertJumpHelp(); + move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); + scratchRegs[0] = PhysicalReg_EAX; + call_dvmJitToInterpTraceSelect(); + //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ +} + +/* Chaining cell for branches that branch back into the same basic block */ +static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, + unsigned int offset, int blockId, LowOpBlockLabel* labelList) +{ + ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x", + cUnit->method->name, blockId, offset, stream - streamMethodStart); + if(dump_x86_inst) + ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p", + offset, stream - streamMethodStart, stream); + /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps + * reslove the multithreading issue. + */ + insertJumpHelp(); + move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); + scratchRegs[0] = PhysicalReg_EAX; + call_dvmJitToInterpNormal(); + //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ +} + +/* Chaining cell for monomorphic method invocations. */ +static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, + const Method *callee, int blockId, LowOpBlockLabel* labelList) +{ + ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x", + cUnit->method->name, blockId, callee->name, stream - streamMethodStart); + if(dump_x86_inst) + ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p", + blockId, stream - streamMethodStart, stream); + /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps + * reslove the multithreading issue. + */ + insertJumpHelp(); + move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); + scratchRegs[0] = PhysicalReg_EAX; + call_dvmJitToInterpTraceSelect(); + //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */ +} +#undef P_GPR_1 + +/* Chaining cell for monomorphic method invocations. */ +static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId) +{ + if(dump_x86_inst) + ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p", + blockId, stream - streamMethodStart, stream); +#ifndef PREDICTED_CHAINING + //assume rPC for callee->insns in %ebx + scratchRegs[0] = PhysicalReg_EAX; + call_dvmJitToInterpTraceSelectNoChain(); +#else + /* make sure section for predicited chaining cell is 4-byte aligned */ + //int padding = (4 - ((u4)stream & 3)) & 3; + //stream += padding; + int* streamData = (int*)stream; + /* Should not be executed in the initial state */ + streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT; + streamData[1] = 0; + /* To be filled: class */ + streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT; + /* To be filled: method */ + streamData[3] = PREDICTED_CHAIN_METHOD_INIT; + /* + * Rechain count. The initial value of 0 here will trigger chaining upon + * the first invocation of this callsite. + */ + streamData[4] = PREDICTED_CHAIN_COUNTER_INIT; +#if 0 + ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)), + *((int*)(stream+8)), *((int*)(stream+12))); +#endif + stream += 20; //5 *4 +#endif +} + +/* Load the Dalvik PC into r0 and jump to the specified target */ +static void handlePCReconstruction(CompilationUnit *cUnit, + LowOpBlockLabel *targetLabel) +{ +#if 0 + LowOp **pcrLabel = + (LowOp **) cUnit->pcReconstructionList.elemList; + int numElems = cUnit->pcReconstructionList.numUsed; + int i; + for (i = 0; i < numElems; i++) { + dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); + /* r0 = dalvik PC */ + loadConstant(cUnit, r0, pcrLabel[i]->operands[0]); + genUnconditionalBranch(cUnit, targetLabel); + } +#endif +} + +//use O0 code generator for hoisted checks outside of the loop +/* + * vA = arrayReg; + * vB = idxReg; + * vC = endConditionReg; + * arg[0] = maxC + * arg[1] = minC + * arg[2] = loopBranchConditionCode + */ +#define P_GPR_1 PhysicalReg_EBX +#define P_GPR_2 PhysicalReg_ECX +static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) +{ + /* + * NOTE: these synthesized blocks don't have ssa names assigned + * for Dalvik registers. However, because they dominate the following + * blocks we can simply use the Dalvik name w/ subscript 0 as the + * ssa name. + */ + DecodedInstruction *dInsn = &mir->dalvikInsn; + const int maxC = dInsn->arg[0]; + + /* assign array in virtual register to P_GPR_1 */ + get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); + /* assign index in virtual register to P_GPR_2 */ + get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true); + export_pc(); + compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); + condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); + int delta = maxC; + /* + * If the loop end condition is ">=" instead of ">", then the largest value + * of the index is "endCondition - 1". + */ + if (dInsn->arg[2] == OP_IF_GE) { + delta--; + } + + if (delta < 0) { //+delta + //if P_GPR_2 is mapped to a VR, we can't do this + alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true); + } else if(delta > 0) { + alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true); + } + compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); + condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); +} + +/* + * vA = arrayReg; + * vB = idxReg; + * vC = endConditionReg; + * arg[0] = maxC + * arg[1] = minC + * arg[2] = loopBranchConditionCode + */ +static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) +{ + DecodedInstruction *dInsn = &mir->dalvikInsn; + const int maxC = dInsn->arg[0]; + + /* assign array in virtual register to P_GPR_1 */ + get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); + /* assign index in virtual register to P_GPR_2 */ + get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true); + export_pc(); + compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); + condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); + + if (maxC < 0) { + //if P_GPR_2 is mapped to a VR, we can't do this + alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true); + } else if(maxC > 0) { + alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true); + } + compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); + condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); + +} +#undef P_GPR_1 +#undef P_GPR_2 + +/* + * vA = idxReg; + * vB = minC; + */ +#define P_GPR_1 PhysicalReg_ECX +static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) +{ + DecodedInstruction *dInsn = &mir->dalvikInsn; + const int minC = dInsn->vB; + get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array + export_pc(); + compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true); + condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId); +} +#undef P_GPR_1 + +#ifdef WITH_JIT_INLINING +static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir) +{ + CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; + if(gDvm.executionMode == kExecutionModeNcgO0) { + get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true); + move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true); + compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true); + export_pc(); //use %edx + conditional_jump_global_API(, Condition_E, "common_errNullObject", false); + move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true); + compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true); + } else { + get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false); + move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false); + nullCheck(5, false, 1, mir->dalvikInsn.vC); + move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false); + compare_reg_reg(4, false, 6, false); + } + + //immdiate will be updated later in genLandingPadForMispredictedCallee + streamMisPred = stream; + callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8); +} +#endif + +/* Extended MIR instructions like PHI */ +void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir) +{ + ExecutionMode origMode = gDvm.executionMode; + gDvm.executionMode = kExecutionModeNcgO0; + switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { + case kMirOpPhi: { + break; + } + case kMirOpNullNRangeUpCheck: { + genHoistedChecksForCountUpLoop(cUnit, mir); + break; + } + case kMirOpNullNRangeDownCheck: { + genHoistedChecksForCountDownLoop(cUnit, mir); + break; + } + case kMirOpLowerBound: { + genHoistedLowerBoundCheck(cUnit, mir); + break; + } + case kMirOpPunt: { + break; + } +#ifdef WITH_JIT_INLINING + case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c + genValidationForPredictedInline(cUnit, mir); + break; + } +#endif + default: + break; + } + gDvm.executionMode = origMode; +} + +static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, + int bodyId) +{ + /* + * Next, create two branches - one branch over to the loop body and the + * other branch to the PCR cell to punt. + */ + //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId); + //setupResourceMasks(branchToBody); + //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody); + +#if 0 + LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true); + branchToPCR->opCode = kThumbBUncond; + branchToPCR->generic.target = (LIR *) pcrLabel; + setupResourceMasks(branchToPCR); + cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR; +#endif +} + +/* check whether we can merge the block at index i with its target block */ +bool mergeBlock(BasicBlock *bb) { + if(bb->blockType == kDalvikByteCode && + bb->firstMIRInsn != NULL && + (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 || + bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO || + bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) && + bb->fallThrough == NULL) {// && + //cUnit->hasLoop) { + //ALOGI("merge blocks ending with goto at index %d", i); + MIR* prevInsn = bb->lastMIRInsn->prev; + if(bb->taken == NULL) return false; + MIR* mergeInsn = bb->taken->firstMIRInsn; + if(mergeInsn == NULL) return false; + if(prevInsn == NULL) {//the block has a single instruction + bb->firstMIRInsn = mergeInsn; + } else { + prevInsn->next = mergeInsn; //remove goto from the chain + } + mergeInsn->prev = prevInsn; + bb->lastMIRInsn = bb->taken->lastMIRInsn; + bb->taken->firstMIRInsn = NULL; //block being merged in + bb->fallThrough = bb->taken->fallThrough; + bb->taken = bb->taken->taken; + return true; + } + return false; +} + +static int genTraceProfileEntry(CompilationUnit *cUnit) +{ + cUnit->headerSize = 6; + if ((gDvmJit.profileMode == kTraceProfilingContinuous) || + (gDvmJit.profileMode == kTraceProfilingDisabled)) { + return 12; + } else { + return 4; + } + +} + +#define PRINT_BUFFER_LEN 1024 +/* Print the code block in code cache in the range of [startAddr, endAddr) + * in readable format. + */ +void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr) +{ + char strbuf[PRINT_BUFFER_LEN]; + unsigned char *addr; + unsigned char *next_addr; + int n; + + if (gDvmJit.printBinary) { + // print binary in bytes + n = 0; + for (addr = startAddr; addr < endAddr; addr++) { + n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr); + if (n > PRINT_BUFFER_LEN - 10) { + ALOGD("## %s", strbuf); + n = 0; + } + } + if (n > 0) + ALOGD("## %s", strbuf); + } + + // print disassembled instructions + addr = startAddr; + while (addr < endAddr) { + next_addr = reinterpret_cast<unsigned char*> + (decoder_disassemble_instr(reinterpret_cast<char*>(addr), + strbuf, PRINT_BUFFER_LEN)); + if (addr != next_addr) { + ALOGD("** %p: %s", addr, strbuf); + } else { // check whether this is nop padding + if (addr[0] == 0x90) { + ALOGD("** %p: NOP (1 byte)", addr); + next_addr += 1; + } else if (addr[0] == 0x66 && addr[1] == 0x90) { + ALOGD("** %p: NOP (2 bytes)", addr); + next_addr += 2; + } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) { + ALOGD("** %p: NOP (3 bytes)", addr); + next_addr += 3; + } else { + ALOGD("** unable to decode binary at %p", addr); + break; + } + } + addr = next_addr; + } +} + +/* 4 is the number of additional bytes needed for chaining information for trace: + * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */ +#define EXTRA_BYTES_FOR_CHAINING 4 + +/* Entry function to invoke the backend of the JIT compiler */ +void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info) +{ + dump_x86_inst = cUnit->printMe; + /* Used to hold the labels of each block */ + LowOpBlockLabel *labelList = + (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c + LowOp *headLIR = NULL; + GrowableList chainingListByType[kChainingCellLast]; + unsigned int i, padding; + + /* + * Initialize various types chaining lists. + */ + for (i = 0; i < kChainingCellLast; i++) { + dvmInitGrowableList(&chainingListByType[i], 2); + } + + /* Clear the visited flag for each block */ + dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag, + kAllNodes, false /* isIterative */); + + GrowableListIterator iterator; + dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); + + /* Traces start with a profiling entry point. Generate it here */ + cUnit->profileCodeSize = genTraceProfileEntry(cUnit); + + //BasicBlock **blockList = cUnit->blockList; + GrowableList *blockList = &cUnit->blockList; + BasicBlock *bb; + + info->codeAddress = NULL; + stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed; + + // TODO: compile into a temporary buffer and then copy into the code cache. + // That would let us leave the code cache unprotected for a shorter time. + size_t unprotected_code_cache_bytes = + gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING; + UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + + streamStart = stream; /* trace start before alignment */ + stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */ + stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */ + streamMethodStart = stream; /* code start */ + for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) { + labelList[i].lop.generic.offset = -1; + } + cUnit->exceptionBlockId = -1; + for (i = 0; i < blockList->numUsed; i++) { + bb = (BasicBlock *) blockList->elemList[i]; + if(bb->blockType == kExceptionHandling) + cUnit->exceptionBlockId = i; + } + startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit); + if(gDvm.executionMode == kExecutionModeNcgO1) { + //merge blocks ending with "goto" with the fall through block + if (cUnit->jitMode != kJitLoop) + for (i = 0; i < blockList->numUsed; i++) { + bb = (BasicBlock *) blockList->elemList[i]; + bool merged = mergeBlock(bb); + while(merged) merged = mergeBlock(bb); + } + for (i = 0; i < blockList->numUsed; i++) { + bb = (BasicBlock *) blockList->elemList[i]; + if(bb->blockType == kDalvikByteCode && + bb->firstMIRInsn != NULL) { + preprocessingBB(bb); + } + } + preprocessingTrace(); + } + + /* Handle the content in each basic block */ + for (i = 0; ; i++) { + MIR *mir; + bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator); + if (bb == NULL) break; + if (bb->visited == true) continue; + + labelList[i].immOpnd.value = bb->startOffset; + + if (bb->blockType >= kChainingCellLast) { + /* + * Append the label pseudo LIR first. Chaining cells will be handled + * separately afterwards. + */ + dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]); + } + + if (bb->blockType == kEntryBlock) { + labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK; + if (bb->firstMIRInsn == NULL) { + continue; + } else { + setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id); + //&labelList[blockList[i]->fallThrough->id]); + } + } else if (bb->blockType == kExitBlock) { + labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK; + labelList[i].lop.generic.offset = (stream - streamMethodStart); + goto gen_fallthrough; + } else if (bb->blockType == kDalvikByteCode) { + if (bb->hidden == true) continue; + labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL; + /* Reset the register state */ +#if 0 + resetRegisterScoreboard(cUnit); +#endif + } else { + switch (bb->blockType) { + case kChainingCellNormal: + labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellNormal], i); + break; + case kChainingCellInvokeSingleton: + labelList[i].lop.opCode2 = + ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON; + labelList[i].immOpnd.value = + (int) bb->containingMethod; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellInvokeSingleton], i); + break; + case kChainingCellInvokePredicted: + labelList[i].lop.opCode2 = + ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED; + /* + * Move the cached method pointer from operand 1 to 0. + * Operand 0 was clobbered earlier in this routine to store + * the block starting offset, which is not applicable to + * predicted chaining cell. + */ + //TODO + //labelList[i].operands[0] = labelList[i].operands[1]; + + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellInvokePredicted], i); + break; + case kChainingCellHot: + labelList[i].lop.opCode2 = + ATOM_PSEUDO_CHAINING_CELL_HOT; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellHot], i); + break; + case kPCReconstruction: + /* Make sure exception handling block is next */ + labelList[i].lop.opCode2 = + ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL; + //assert (i == cUnit->numBlocks - 2); + labelList[i].lop.generic.offset = (stream - streamMethodStart); + handlePCReconstruction(cUnit, + &labelList[cUnit->puntBlock->id]); + break; + case kExceptionHandling: + labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL; + labelList[i].lop.generic.offset = (stream - streamMethodStart); + //if (cUnit->pcReconstructionList.numUsed) { + scratchRegs[0] = PhysicalReg_EAX; + jumpToInterpPunt(); + //call_dvmJitToInterpPunt(); + //} + break; + case kChainingCellBackwardBranch: + labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH; + /* handle the codegen later */ + dvmInsertGrowableList( + &chainingListByType[kChainingCellBackwardBranch], + i); + break; + default: + break; + } + continue; + } + { + //LowOp *headLIR = NULL; + const DexCode *dexCode = dvmGetMethodCode(cUnit->method); + const u2 *startCodePtr = dexCode->insns; + const u2 *codePtr; + labelList[i].lop.generic.offset = (stream - streamMethodStart); + ALOGV("get ready to handle JIT bb %d type %d hidden %d", + bb->id, bb->blockType, bb->hidden); + for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) { + bb = nextBB; + bb->visited = true; + cUnit->nextCodegenBlock = NULL; + + if(gDvm.executionMode == kExecutionModeNcgO1 && + bb->blockType != kEntryBlock && + bb->firstMIRInsn != NULL) { + startOfBasicBlock(bb); + int cg_ret = codeGenBasicBlockJit(cUnit->method, bb); + endOfBasicBlock(bb); + if(cg_ret < 0) { + endOfTrace(true/*freeOnly*/); + cUnit->baseAddr = NULL; + ALOGI("codeGenBasicBlockJit returns negative number"); + PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + return; + } + } else { + for (mir = bb->firstMIRInsn; mir; mir = mir->next) { + startOfBasicBlock(bb); //why here for O0 + Opcode dalvikOpCode = mir->dalvikInsn.opcode; + if((int)dalvikOpCode >= (int)kMirOpFirst) { + handleExtendedMIR(cUnit, mir); + continue; + } + InstructionFormat dalvikFormat = + dexGetFormatFromOpcode(dalvikOpCode); + ALOGV("ready to handle bytecode at offset %x: opcode %d format %d", + mir->offset, dalvikOpCode, dalvikFormat); + LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset); + /* Remember the first LIR for this block */ + if (headLIR == NULL) { + headLIR = (LowOp*)boundaryLIR; + } + bool notHandled = true; + /* + * Debugging: screen the opcode first to see if it is in the + * do[-not]-compile list + */ + bool singleStepMe = + gDvmJit.includeSelectedOp != + ((gDvmJit.opList[dalvikOpCode >> 3] & + (1 << (dalvikOpCode & 0x7))) != + 0); + if (singleStepMe || cUnit->allSingleStep) { + } else { + codePtr = startCodePtr + mir->offset; + //lower each byte code, update LIR + notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir); + if(gDvmJit.codeCacheByteUsed + (stream - streamStart) + + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { + ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart)); + gDvmJit.codeCacheFull = true; + cUnit->baseAddr = NULL; + endOfTrace(true/*freeOnly*/); + PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + return; + } + } + if (notHandled) { + ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled", + mir->offset, + dalvikOpCode, dexGetOpcodeName(dalvikOpCode), + dalvikFormat); + dvmAbort(); + break; + } + } // end for + } // end else //JIT + O0 code generator + } + } // end for + /* Eliminate redundant loads/stores and delay stores into later slots */ +#if 0 + dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR, + cUnit->lastLIRInsn); +#endif + if (headLIR) headLIR = NULL; +gen_fallthrough: + /* + * Check if the block is terminated due to trace length constraint - + * insert an unconditional branch to the chaining cell. + */ + if (bb->needFallThroughBranch) { + jumpToBasicBlock(stream, bb->fallThrough->id); + } + + } + + char* streamChainingStart = (char*)stream; + /* Handle the chaining cells in predefined order */ + for (i = 0; i < kChainingCellGap; i++) { + size_t j; + int *blockIdList = (int *) chainingListByType[i].elemList; + + cUnit->numChainingCells[i] = chainingListByType[i].numUsed; + + /* No chaining cells of this type */ + if (cUnit->numChainingCells[i] == 0) + continue; + + /* Record the first LIR for a new type of chaining cell */ + cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; + for (j = 0; j < chainingListByType[i].numUsed; j++) { + int blockId = blockIdList[j]; + BasicBlock *chainingBlock = + (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, + blockId); + + labelList[blockId].lop.generic.offset = (stream - streamMethodStart); + + /* Align this chaining cell first */ +#if 0 + newLIR0(cUnit, ATOM_PSEUDO_ALIGN4); +#endif + /* Insert the pseudo chaining instruction */ + dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); + + + switch (chainingBlock->blockType) { + case kChainingCellNormal: + handleNormalChainingCell(cUnit, + chainingBlock->startOffset, blockId, labelList); + break; + case kChainingCellInvokeSingleton: + handleInvokeSingletonChainingCell(cUnit, + chainingBlock->containingMethod, blockId, labelList); + break; + case kChainingCellInvokePredicted: + handleInvokePredictedChainingCell(cUnit, blockId); + break; + case kChainingCellHot: + handleHotChainingCell(cUnit, + chainingBlock->startOffset, blockId, labelList); + break; + case kChainingCellBackwardBranch: + handleBackwardBranchChainingCell(cUnit, + chainingBlock->startOffset, blockId, labelList); + break; + default: + ALOGE("Bad blocktype %d", chainingBlock->blockType); + dvmAbort(); + break; + } + + if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { + ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart)); + gDvmJit.codeCacheFull = true; + cUnit->baseAddr = NULL; + endOfTrace(true); /* need to free structures */ + PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + return; + } + } + } +#if 0 + dvmCompilerApplyGlobalOptimizations(cUnit); +#endif + endOfTrace(false); + + if (gDvmJit.codeCacheFull) { + /* We hit code cache size limit inside endofTrace(false). + * Bail out for this trace! + */ + ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart)); + cUnit->baseAddr = NULL; + PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + return; + } + + /* dump section for chaining cell counts, make sure it is 4-byte aligned */ + padding = (4 - ((u4)stream & 3)) & 3; + stream += padding; + ChainCellCounts chainCellCounts; + /* Install the chaining cell counts */ + for (i=0; i< kChainingCellGap; i++) { + chainCellCounts.u.count[i] = cUnit->numChainingCells[i]; + } + char* streamCountStart = (char*)stream; + memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts)); + stream += sizeof(chainCellCounts); + + cUnit->baseAddr = streamMethodStart; + cUnit->totalSize = (stream - streamStart); + if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { + ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart)); + gDvmJit.codeCacheFull = true; + cUnit->baseAddr = NULL; + PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + return; + } + + /* write chaining cell count offset & chaining cell offset */ + u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */ + *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */ + pOffset[1] = streamChainingStart - streamMethodStart; + + PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); + + gDvmJit.codeCacheByteUsed += (stream - streamStart); + if (cUnit->printMe) { + unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr; + unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed; + ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p", + cUnit->method->clazz->descriptor, cUnit->method->name, + codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache); + ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor, + cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset); + printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext); + } + ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr, + (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed, + cUnit->totalSize, gDvmJit.codeCache); + + gDvmJit.numCompilations++; + + info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize; +} + +/* + * Perform translation chain operation. + */ +void* dvmJitChain(void* tgtAddr, u4* branchAddr) +{ +#ifdef JIT_CHAIN + int relOffset = (int) tgtAddr - (int)branchAddr; + + if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) && + (gDvmJit.codeCacheFull == false)) { + + gDvmJit.translationChains++; + + //OpndSize immSize = estOpndSizeFromImm(relOffset); + //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); + /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in + * the original code sequence. + */ + OpndSize immSize = OpndSize_32; + relOffset -= 5; + //can't use stream here since it is used by the compilation thread + UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); + dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr + PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); + + gDvmJit.hasNewChain = true; + + COMPILER_TRACE_CHAINING( + ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x", + (int) branchAddr, tgtAddr, relOffset)); + } +#endif + return tgtAddr; +} + +/* + * Accept the work and start compiling. Returns true if compilation + * is attempted. + */ +bool dvmCompilerDoWork(CompilerWorkOrder *work) +{ + JitTraceDescription *desc; + bool isCompile; + bool success = true; + + if (gDvmJit.codeCacheFull) { + return false; + } + + switch (work->kind) { + case kWorkOrderTrace: + isCompile = true; + /* Start compilation with maximally allowed trace length */ + desc = (JitTraceDescription *)work->info; + success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, + work->bailPtr, 0 /* no hints */); + break; + case kWorkOrderTraceDebug: { + bool oldPrintMe = gDvmJit.printMe; + gDvmJit.printMe = true; + isCompile = true; + /* Start compilation with maximally allowed trace length */ + desc = (JitTraceDescription *)work->info; + success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, + work->bailPtr, 0 /* no hints */); + gDvmJit.printMe = oldPrintMe; + break; + } + case kWorkOrderProfileMode: + dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info); + isCompile = false; + break; + default: + isCompile = false; + ALOGE("Jit: unknown work order type"); + assert(0); // Bail if debug build, discard otherwise + } + if (!success) + work->result.codeAddress = NULL; + return isCompile; +} + +void dvmCompilerCacheFlush(long start, long end, long flags) { + /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */ +} + +//#endif |
