summaryrefslogtreecommitdiffstats
path: root/vm/compiler/codegen/x86/CodegenInterface.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'vm/compiler/codegen/x86/CodegenInterface.cpp')
-rw-r--r--vm/compiler/codegen/x86/CodegenInterface.cpp1532
1 files changed, 1532 insertions, 0 deletions
diff --git a/vm/compiler/codegen/x86/CodegenInterface.cpp b/vm/compiler/codegen/x86/CodegenInterface.cpp
new file mode 100644
index 000000000..aade180e0
--- /dev/null
+++ b/vm/compiler/codegen/x86/CodegenInterface.cpp
@@ -0,0 +1,1532 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <sys/mman.h>
+#include "Dalvik.h"
+#include "libdex/DexOpcodes.h"
+#include "compiler/Compiler.h"
+#include "compiler/CompilerIR.h"
+#include "interp/Jit.h"
+#include "libdex/DexFile.h"
+#include "Lower.h"
+#include "NcgAot.h"
+#include "compiler/codegen/CompilerCodegen.h"
+
+/* Init values when a predicted chain is initially assembled */
+/* E7FE is branch to self */
+#define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe
+
+/* Target-specific save/restore */
+extern "C" void dvmJitCalleeSave(double *saveArea);
+extern "C" void dvmJitCalleeRestore(double *saveArea);
+
+/*
+ * Determine the initial instruction set to be used for this trace.
+ * Later components may decide to change this.
+ */
+//JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
+JitInstructionSetType dvmCompilerInstructionSet(void)
+{
+ return DALVIK_JIT_IA32;
+}
+
+JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
+{
+ return DALVIK_JIT_IA32;
+}
+
+/* we don't use template for IA32 */
+void *dvmCompilerGetInterpretTemplate()
+{
+ return NULL;
+}
+
+/* Track the number of times that the code cache is patched */
+#if defined(WITH_JIT_TUNING)
+#define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++)
+#else
+#define UPDATE_CODE_CACHE_PATCHES()
+#endif
+
+bool dvmCompilerArchInit() {
+ /* Target-specific configuration */
+ gDvmJit.jitTableSize = 1 << 12;
+ gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
+ gDvmJit.threshold = 255;
+ gDvmJit.codeCacheSize = 512*1024;
+ gDvmJit.optLevel = kJitOptLevelO1;
+
+#if defined(WITH_SELF_VERIFICATION)
+ /* Force into blocking mode */
+ gDvmJit.blockingMode = true;
+ gDvm.nativeDebuggerActive = true;
+#endif
+
+ // Make sure all threads have current values
+ dvmJitUpdateThreadStateAll();
+
+ return true;
+}
+
+void dvmCompilerPatchInlineCache(void)
+{
+ int i;
+ PredictedChainingCell *minAddr, *maxAddr;
+
+ /* Nothing to be done */
+ if (gDvmJit.compilerICPatchIndex == 0) return;
+
+ /*
+ * Since all threads are already stopped we don't really need to acquire
+ * the lock. But race condition can be easily introduced in the future w/o
+ * paying attention so we still acquire the lock here.
+ */
+ dvmLockMutex(&gDvmJit.compilerICPatchLock);
+
+ UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
+
+ //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
+
+ /* Initialize the min/max address range */
+ minAddr = (PredictedChainingCell *)
+ ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
+ maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
+
+ for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
+ ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
+ PredictedChainingCell *cellAddr = workOrder->cellAddr;
+ PredictedChainingCell *cellContent = &workOrder->cellContent;
+ ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
+ workOrder->classLoader);
+
+ assert(clazz->serialNumber == workOrder->serialNumber);
+
+ /* Use the newly resolved clazz pointer */
+ cellContent->clazz = clazz;
+
+ if (cellAddr->clazz == NULL) {
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
+ cellAddr,
+ cellContent->clazz->descriptor,
+ cellContent->method->name));
+ } else {
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
+ "patched",
+ cellAddr,
+ cellAddr->clazz->descriptor,
+ cellContent->clazz->descriptor,
+ cellContent->method->name));
+ }
+
+ /* Patch the chaining cell */
+ *cellAddr = *cellContent;
+ minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
+ maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
+ }
+
+ PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
+
+ gDvmJit.compilerICPatchIndex = 0;
+ dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
+}
+
+/* Target-specific cache clearing */
+void dvmCompilerCacheClear(char *start, size_t size)
+{
+ /* "0xFF 0xFF" is an invalid opcode for x86. */
+ memset(start, 0xFF, size);
+}
+
+/* for JIT debugging, to be implemented */
+void dvmJitCalleeSave(double *saveArea) {
+}
+
+void dvmJitCalleeRestore(double *saveArea) {
+}
+
+void dvmJitToInterpSingleStep() {
+}
+
+JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
+ const JitEntry *knownEntry) {
+ return NULL;
+}
+
+void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
+{
+}
+
+void dvmCompilerArchDump(void)
+{
+}
+
+char *getTraceBase(const JitEntry *p)
+{
+ return NULL;
+}
+
+void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
+{
+}
+
+void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
+{
+}
+
+void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
+{
+ ALOGE("Method-based JIT not supported for the x86 target");
+ dvmAbort();
+}
+
+void dvmJitScanAllClassPointers(void (*callback)(void *))
+{
+}
+
+/* Handy function to retrieve the profile count */
+static inline int getProfileCount(const JitEntry *entry)
+{
+ if (entry->dPC == 0 || entry->codeAddress == 0)
+ return 0;
+ u4 *pExecutionCount = (u4 *) getTraceBase(entry);
+
+ return pExecutionCount ? *pExecutionCount : 0;
+}
+
+/* qsort callback function */
+static int sortTraceProfileCount(const void *entry1, const void *entry2)
+{
+ const JitEntry *jitEntry1 = (const JitEntry *)entry1;
+ const JitEntry *jitEntry2 = (const JitEntry *)entry2;
+
+ JitTraceCounter_t count1 = getProfileCount(jitEntry1);
+ JitTraceCounter_t count2 = getProfileCount(jitEntry2);
+ return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
+}
+
+/* Sort the trace profile counts and dump them */
+void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
+{
+ JitEntry *sortedEntries;
+ int numTraces = 0;
+ unsigned long counts = 0;
+ unsigned int i;
+
+ /* Make sure that the table is not changing */
+ dvmLockMutex(&gDvmJit.tableLock);
+
+ /* Sort the entries by descending order */
+ sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
+ if (sortedEntries == NULL)
+ goto done;
+ memcpy(sortedEntries, gDvmJit.pJitEntryTable,
+ sizeof(JitEntry) * gDvmJit.jitTableSize);
+ qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
+ sortTraceProfileCount);
+
+ /* Dump the sorted entries */
+ for (i=0; i < gDvmJit.jitTableSize; i++) {
+ if (sortedEntries[i].dPC != 0) {
+ numTraces++;
+ }
+ }
+ if (numTraces == 0)
+ numTraces = 1;
+ ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
+
+ free(sortedEntries);
+done:
+ dvmUnlockMutex(&gDvmJit.tableLock);
+ return;
+}
+
+void jumpWithRelOffset(char* instAddr, int relOffset) {
+ stream = instAddr;
+ OpndSize immSize = estOpndSizeFromImm(relOffset);
+ relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
+ dump_imm(Mnemonic_JMP, immSize, relOffset);
+}
+
+// works whether instructions for target basic block are generated or not
+LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
+ stream = instAddr;
+ bool unknown;
+ OpndSize size;
+ int relativeNCG = targetId;
+ relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
+ unconditional_jump_int(relativeNCG, size);
+ return NULL;
+}
+
+LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
+ stream = instAddr;
+ bool unknown;
+ OpndSize size;
+ int relativeNCG = targetId;
+ relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
+ conditional_jump_int(cc, relativeNCG, size);
+ return NULL;
+}
+
+/*
+ * Attempt to enqueue a work order to patch an inline cache for a predicted
+ * chaining cell for virtual/interface calls.
+ */
+static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
+ PredictedChainingCell *newContent)
+{
+ bool result = true;
+
+ /*
+ * Make sure only one thread gets here since updating the cell (ie fast
+ * path and queueing the request (ie the queued path) have to be done
+ * in an atomic fashion.
+ */
+ dvmLockMutex(&gDvmJit.compilerICPatchLock);
+
+ /* Fast path for uninitialized chaining cell */
+ if (cellAddr->clazz == NULL &&
+ cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
+ UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
+
+ cellAddr->method = newContent->method;
+ cellAddr->branch = newContent->branch;
+ cellAddr->branch2 = newContent->branch2;
+
+ /*
+ * The update order matters - make sure clazz is updated last since it
+ * will bring the uninitialized chaining cell to life.
+ */
+ android_atomic_release_store((int32_t)newContent->clazz,
+ (volatile int32_t *)(void*) &cellAddr->clazz);
+ //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
+ UPDATE_CODE_CACHE_PATCHES();
+
+ PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
+
+#if 0
+ MEM_BARRIER();
+ cellAddr->clazz = newContent->clazz;
+ //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
+#endif
+#if defined(IA_JIT_TUNING)
+ gDvmJit.icPatchInit++;
+#endif
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
+ cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
+ /* Check if this is a frequently missed clazz */
+ } else if (cellAddr->stagedClazz != newContent->clazz) {
+ /* Not proven to be frequent yet - build up the filter cache */
+ UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
+
+ cellAddr->stagedClazz = newContent->clazz;
+
+ UPDATE_CODE_CACHE_PATCHES();
+ PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
+
+#if defined(WITH_JIT_TUNING)
+ gDvmJit.icPatchRejected++;
+#endif
+ /*
+ * Different classes but same method implementation - it is safe to just
+ * patch the class value without the need to stop the world.
+ */
+ } else if (cellAddr->method == newContent->method) {
+ UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
+
+ cellAddr->clazz = newContent->clazz;
+ /* No need to flush the cache here since the branch is not patched */
+ UPDATE_CODE_CACHE_PATCHES();
+
+ PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
+
+#if defined(WITH_JIT_TUNING)
+ gDvmJit.icPatchLockFree++;
+#endif
+ /*
+ * Cannot patch the chaining cell inline - queue it until the next safe
+ * point.
+ */
+ } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) {
+ int index = gDvmJit.compilerICPatchIndex++;
+ const ClassObject *clazz = newContent->clazz;
+
+ gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
+ gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
+ gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
+ gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
+ /* For verification purpose only */
+ gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
+
+#if defined(WITH_JIT_TUNING)
+ gDvmJit.icPatchQueued++;
+#endif
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
+ cellAddr, newContent->clazz->descriptor, newContent->method->name));
+ } else {
+ /* Queue is full - just drop this patch request */
+#if defined(WITH_JIT_TUNING)
+ gDvmJit.icPatchDropped++;
+#endif
+
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
+ cellAddr, newContent->clazz->descriptor, newContent->method->name));
+ }
+
+ dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
+ return result;
+}
+
+/*
+ * This method is called from the invoke templates for virtual and interface
+ * methods to speculatively setup a chain to the callee. The templates are
+ * written in assembly and have setup method, cell, and clazz at r0, r2, and
+ * r3 respectively, so there is a unused argument in the list. Upon return one
+ * of the following three results may happen:
+ * 1) Chain is not setup because the callee is native. Reset the rechain
+ * count to a big number so that it will take a long time before the next
+ * rechain attempt to happen.
+ * 2) Chain is not setup because the callee has not been created yet. Reset
+ * the rechain count to a small number and retry in the near future.
+ * 3) Ask all other threads to stop before patching this chaining cell.
+ * This is required because another thread may have passed the class check
+ * but hasn't reached the chaining cell yet to follow the chain. If we
+ * patch the content before halting the other thread, there could be a
+ * small window for race conditions to happen that it may follow the new
+ * but wrong chain to invoke a different method.
+ */
+const Method *dvmJitToPatchPredictedChain(const Method *method,
+ Thread *self,
+ PredictedChainingCell *cell,
+ const ClassObject *clazz)
+{
+ int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
+ /* Don't come back here for a long time if the method is native */
+ if (dvmIsNativeMethod(method)) {
+ UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
+
+ /*
+ * Put a non-zero/bogus value in the clazz field so that it won't
+ * trigger immediate patching and will continue to fail to match with
+ * a real clazz pointer.
+ */
+ cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
+
+ UPDATE_CODE_CACHE_PATCHES();
+ PROTECT_CODE_CACHE(cell, sizeof(*cell));
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
+ cell, method->name));
+ goto done;
+ }
+ {
+ int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
+
+ /*
+ * Compilation not made yet for the callee. Reset the counter to a small
+ * value and come back to check soon.
+ */
+ if ((tgtAddr == 0) ||
+ ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
+ cell, method->clazz->descriptor, method->name));
+ goto done;
+ }
+
+ PredictedChainingCell newCell;
+
+ if (cell->clazz == NULL) {
+ newRechainCount = self->icRechainCount;
+ }
+
+ int relOffset = (int) tgtAddr - (int)cell;
+ OpndSize immSize = estOpndSizeFromImm(relOffset);
+ int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
+ relOffset -= jumpSize;
+ COMPILER_TRACE_CHAINING(
+ ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
+ cell, method->clazz->descriptor, method->name, jumpSize));
+ //can't use stream here since it is used by the compilation thread
+ dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
+
+ newCell.clazz = clazz;
+ newCell.method = method;
+
+ /*
+ * Enter the work order to the queue and the chaining cell will be patched
+ * the next time a safe point is entered.
+ *
+ * If the enqueuing fails reset the rechain count to a normal value so that
+ * it won't get indefinitely delayed.
+ */
+ inlineCachePatchEnqueue(cell, &newCell);
+ }
+done:
+ self->icRechainCount = newRechainCount;
+ return method;
+}
+
+/*
+ * Unchain a trace given the starting address of the translation
+ * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR.
+ * For ARM, it returns the address following the last cell unchained.
+ * For IA, it returns NULL since cacheflush is not required for IA.
+ */
+u4* dvmJitUnchain(void* codeAddr)
+{
+ /* codeAddr is 4-byte aligned, so is chain cell count offset */
+ u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
+ u2 chainCellCountOffset = *pChainCellCountOffset;
+ /* chain cell counts information is 4-byte aligned */
+ ChainCellCounts *pChainCellCounts =
+ (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
+ u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
+ u2 chainCellOffset = *pChainCellOffset;
+ u1* pChainCells;
+ int i,j;
+ PredictedChainingCell *predChainCell;
+ int padding;
+
+ /* Locate the beginning of the chain cell region */
+ pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
+
+ /* The cells are sorted in order - walk through them and reset */
+ for (i = 0; i < kChainingCellGap; i++) {
+ /* for hot, normal, singleton chaining:
+ nop //padding.
+ jmp 0
+ mov imm32, reg1
+ mov imm32, reg2
+ call reg2
+ after chaining:
+ nop
+ jmp imm
+ mov imm32, reg1
+ mov imm32, reg2
+ call reg2
+ after unchaining:
+ nop
+ jmp 0
+ mov imm32, reg1
+ mov imm32, reg2
+ call reg2
+ Space occupied by the chaining cell in bytes: nop is for padding,
+ jump 0, the target 0 is 4 bytes aligned.
+ Space for predicted chaining: 5 words = 20 bytes
+ */
+ int elemSize = 0;
+ if (i == kChainingCellInvokePredicted) {
+ elemSize = 20;
+ }
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
+
+ for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
+ switch(i) {
+ case kChainingCellNormal:
+ case kChainingCellHot:
+ case kChainingCellInvokeSingleton:
+ case kChainingCellBackwardBranch:
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
+ pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
+ elemSize = 4+5+5+2;
+ memset(pChainCells, 0, 4);
+ break;
+ case kChainingCellInvokePredicted:
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: unchaining of predicted"));
+ /* 4-byte aligned */
+ padding = (4 - ((u4)pChainCells & 3)) & 3;
+ pChainCells += padding;
+ predChainCell = (PredictedChainingCell *) pChainCells;
+ /*
+ * There could be a race on another mutator thread to use
+ * this particular predicted cell and the check has passed
+ * the clazz comparison. So we cannot safely wipe the
+ * method and branch but it is safe to clear the clazz,
+ * which serves as the key.
+ */
+ predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
+ break;
+ default:
+ ALOGE("Unexpected chaining type: %d", i);
+ dvmAbort(); // dvmAbort OK here - can't safely recover
+ }
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
+ pChainCells += elemSize; /* Advance by a fixed number of bytes */
+ }
+ }
+ return NULL;
+}
+
+/* Unchain all translation in the cache. */
+void dvmJitUnchainAll()
+{
+ ALOGV("Jit Runtime: unchaining all");
+ if (gDvmJit.pJitEntryTable != NULL) {
+ COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
+ dvmLockMutex(&gDvmJit.tableLock);
+
+ UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
+
+ for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
+ if (gDvmJit.pJitEntryTable[i].dPC &&
+ !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
+ gDvmJit.pJitEntryTable[i].codeAddress) {
+ dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
+ }
+ }
+
+ PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
+
+ dvmUnlockMutex(&gDvmJit.tableLock);
+ gDvmJit.translationChains = 0;
+ }
+ gDvmJit.hasNewChain = false;
+}
+
+#define P_GPR_1 PhysicalReg_EBX
+/* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
+static void insertJumpHelp()
+{
+ int rem = (uint)stream % 4;
+ int nop_size = 3 - rem;
+ dump_nop(nop_size);
+ unconditional_jump_int(0, OpndSize_32);
+ return;
+}
+
+/* Chaining cell for code that may need warmup. */
+/* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
+ blx r0
+ data 0xb23a //bytecode address: 0x5115b23a
+ data 0x5115
+ IA32 assembly:
+ jmp 0 //5 bytes
+ movl address, %ebx
+ movl dvmJitToInterpNormal, %eax
+ call %eax
+ <-- return address
+*/
+static void handleNormalChainingCell(CompilationUnit *cUnit,
+ unsigned int offset, int blockId, LowOpBlockLabel* labelList)
+{
+ ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
+ cUnit->method->name, blockId, offset, stream - streamMethodStart);
+ if(dump_x86_inst)
+ ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
+ offset, stream - streamMethodStart, stream);
+ /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
+ * reslove the multithreading issue.
+ */
+ insertJumpHelp();
+ move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
+ scratchRegs[0] = PhysicalReg_EAX;
+ call_dvmJitToInterpNormal();
+ //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
+}
+
+/*
+ * Chaining cell for instructions that immediately following already translated
+ * code.
+ */
+static void handleHotChainingCell(CompilationUnit *cUnit,
+ unsigned int offset, int blockId, LowOpBlockLabel* labelList)
+{
+ ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
+ cUnit->method->name, blockId, offset, stream - streamMethodStart);
+ if(dump_x86_inst)
+ ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
+ offset, stream - streamMethodStart, stream);
+ /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
+ * reslove the multithreading issue.
+ */
+ insertJumpHelp();
+ move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
+ scratchRegs[0] = PhysicalReg_EAX;
+ call_dvmJitToInterpTraceSelect();
+ //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
+}
+
+/* Chaining cell for branches that branch back into the same basic block */
+static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
+ unsigned int offset, int blockId, LowOpBlockLabel* labelList)
+{
+ ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
+ cUnit->method->name, blockId, offset, stream - streamMethodStart);
+ if(dump_x86_inst)
+ ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
+ offset, stream - streamMethodStart, stream);
+ /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
+ * reslove the multithreading issue.
+ */
+ insertJumpHelp();
+ move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
+ scratchRegs[0] = PhysicalReg_EAX;
+ call_dvmJitToInterpNormal();
+ //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
+}
+
+/* Chaining cell for monomorphic method invocations. */
+static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
+ const Method *callee, int blockId, LowOpBlockLabel* labelList)
+{
+ ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
+ cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
+ if(dump_x86_inst)
+ ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
+ blockId, stream - streamMethodStart, stream);
+ /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
+ * reslove the multithreading issue.
+ */
+ insertJumpHelp();
+ move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
+ scratchRegs[0] = PhysicalReg_EAX;
+ call_dvmJitToInterpTraceSelect();
+ //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
+}
+#undef P_GPR_1
+
+/* Chaining cell for monomorphic method invocations. */
+static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
+{
+ if(dump_x86_inst)
+ ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
+ blockId, stream - streamMethodStart, stream);
+#ifndef PREDICTED_CHAINING
+ //assume rPC for callee->insns in %ebx
+ scratchRegs[0] = PhysicalReg_EAX;
+ call_dvmJitToInterpTraceSelectNoChain();
+#else
+ /* make sure section for predicited chaining cell is 4-byte aligned */
+ //int padding = (4 - ((u4)stream & 3)) & 3;
+ //stream += padding;
+ int* streamData = (int*)stream;
+ /* Should not be executed in the initial state */
+ streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
+ streamData[1] = 0;
+ /* To be filled: class */
+ streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
+ /* To be filled: method */
+ streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
+ /*
+ * Rechain count. The initial value of 0 here will trigger chaining upon
+ * the first invocation of this callsite.
+ */
+ streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
+#if 0
+ ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
+ *((int*)(stream+8)), *((int*)(stream+12)));
+#endif
+ stream += 20; //5 *4
+#endif
+}
+
+/* Load the Dalvik PC into r0 and jump to the specified target */
+static void handlePCReconstruction(CompilationUnit *cUnit,
+ LowOpBlockLabel *targetLabel)
+{
+#if 0
+ LowOp **pcrLabel =
+ (LowOp **) cUnit->pcReconstructionList.elemList;
+ int numElems = cUnit->pcReconstructionList.numUsed;
+ int i;
+ for (i = 0; i < numElems; i++) {
+ dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
+ /* r0 = dalvik PC */
+ loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
+ genUnconditionalBranch(cUnit, targetLabel);
+ }
+#endif
+}
+
+//use O0 code generator for hoisted checks outside of the loop
+/*
+ * vA = arrayReg;
+ * vB = idxReg;
+ * vC = endConditionReg;
+ * arg[0] = maxC
+ * arg[1] = minC
+ * arg[2] = loopBranchConditionCode
+ */
+#define P_GPR_1 PhysicalReg_EBX
+#define P_GPR_2 PhysicalReg_ECX
+static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
+{
+ /*
+ * NOTE: these synthesized blocks don't have ssa names assigned
+ * for Dalvik registers. However, because they dominate the following
+ * blocks we can simply use the Dalvik name w/ subscript 0 as the
+ * ssa name.
+ */
+ DecodedInstruction *dInsn = &mir->dalvikInsn;
+ const int maxC = dInsn->arg[0];
+
+ /* assign array in virtual register to P_GPR_1 */
+ get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
+ /* assign index in virtual register to P_GPR_2 */
+ get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
+ export_pc();
+ compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
+ condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
+ int delta = maxC;
+ /*
+ * If the loop end condition is ">=" instead of ">", then the largest value
+ * of the index is "endCondition - 1".
+ */
+ if (dInsn->arg[2] == OP_IF_GE) {
+ delta--;
+ }
+
+ if (delta < 0) { //+delta
+ //if P_GPR_2 is mapped to a VR, we can't do this
+ alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
+ } else if(delta > 0) {
+ alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
+ }
+ compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
+ condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
+}
+
+/*
+ * vA = arrayReg;
+ * vB = idxReg;
+ * vC = endConditionReg;
+ * arg[0] = maxC
+ * arg[1] = minC
+ * arg[2] = loopBranchConditionCode
+ */
+static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
+{
+ DecodedInstruction *dInsn = &mir->dalvikInsn;
+ const int maxC = dInsn->arg[0];
+
+ /* assign array in virtual register to P_GPR_1 */
+ get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
+ /* assign index in virtual register to P_GPR_2 */
+ get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
+ export_pc();
+ compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
+ condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
+
+ if (maxC < 0) {
+ //if P_GPR_2 is mapped to a VR, we can't do this
+ alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
+ } else if(maxC > 0) {
+ alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
+ }
+ compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
+ condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
+
+}
+#undef P_GPR_1
+#undef P_GPR_2
+
+/*
+ * vA = idxReg;
+ * vB = minC;
+ */
+#define P_GPR_1 PhysicalReg_ECX
+static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
+{
+ DecodedInstruction *dInsn = &mir->dalvikInsn;
+ const int minC = dInsn->vB;
+ get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
+ export_pc();
+ compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
+ condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
+}
+#undef P_GPR_1
+
+#ifdef WITH_JIT_INLINING
+static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
+{
+ CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
+ if(gDvm.executionMode == kExecutionModeNcgO0) {
+ get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
+ move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
+ compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
+ export_pc(); //use %edx
+ conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
+ move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
+ compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
+ } else {
+ get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
+ move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
+ nullCheck(5, false, 1, mir->dalvikInsn.vC);
+ move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
+ compare_reg_reg(4, false, 6, false);
+ }
+
+ //immdiate will be updated later in genLandingPadForMispredictedCallee
+ streamMisPred = stream;
+ callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
+}
+#endif
+
+/* Extended MIR instructions like PHI */
+void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
+{
+ ExecutionMode origMode = gDvm.executionMode;
+ gDvm.executionMode = kExecutionModeNcgO0;
+ switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
+ case kMirOpPhi: {
+ break;
+ }
+ case kMirOpNullNRangeUpCheck: {
+ genHoistedChecksForCountUpLoop(cUnit, mir);
+ break;
+ }
+ case kMirOpNullNRangeDownCheck: {
+ genHoistedChecksForCountDownLoop(cUnit, mir);
+ break;
+ }
+ case kMirOpLowerBound: {
+ genHoistedLowerBoundCheck(cUnit, mir);
+ break;
+ }
+ case kMirOpPunt: {
+ break;
+ }
+#ifdef WITH_JIT_INLINING
+ case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
+ genValidationForPredictedInline(cUnit, mir);
+ break;
+ }
+#endif
+ default:
+ break;
+ }
+ gDvm.executionMode = origMode;
+}
+
+static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
+ int bodyId)
+{
+ /*
+ * Next, create two branches - one branch over to the loop body and the
+ * other branch to the PCR cell to punt.
+ */
+ //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
+ //setupResourceMasks(branchToBody);
+ //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
+
+#if 0
+ LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
+ branchToPCR->opCode = kThumbBUncond;
+ branchToPCR->generic.target = (LIR *) pcrLabel;
+ setupResourceMasks(branchToPCR);
+ cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
+#endif
+}
+
+/* check whether we can merge the block at index i with its target block */
+bool mergeBlock(BasicBlock *bb) {
+ if(bb->blockType == kDalvikByteCode &&
+ bb->firstMIRInsn != NULL &&
+ (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
+ bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
+ bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
+ bb->fallThrough == NULL) {// &&
+ //cUnit->hasLoop) {
+ //ALOGI("merge blocks ending with goto at index %d", i);
+ MIR* prevInsn = bb->lastMIRInsn->prev;
+ if(bb->taken == NULL) return false;
+ MIR* mergeInsn = bb->taken->firstMIRInsn;
+ if(mergeInsn == NULL) return false;
+ if(prevInsn == NULL) {//the block has a single instruction
+ bb->firstMIRInsn = mergeInsn;
+ } else {
+ prevInsn->next = mergeInsn; //remove goto from the chain
+ }
+ mergeInsn->prev = prevInsn;
+ bb->lastMIRInsn = bb->taken->lastMIRInsn;
+ bb->taken->firstMIRInsn = NULL; //block being merged in
+ bb->fallThrough = bb->taken->fallThrough;
+ bb->taken = bb->taken->taken;
+ return true;
+ }
+ return false;
+}
+
+static int genTraceProfileEntry(CompilationUnit *cUnit)
+{
+ cUnit->headerSize = 6;
+ if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
+ (gDvmJit.profileMode == kTraceProfilingDisabled)) {
+ return 12;
+ } else {
+ return 4;
+ }
+
+}
+
+#define PRINT_BUFFER_LEN 1024
+/* Print the code block in code cache in the range of [startAddr, endAddr)
+ * in readable format.
+ */
+void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
+{
+ char strbuf[PRINT_BUFFER_LEN];
+ unsigned char *addr;
+ unsigned char *next_addr;
+ int n;
+
+ if (gDvmJit.printBinary) {
+ // print binary in bytes
+ n = 0;
+ for (addr = startAddr; addr < endAddr; addr++) {
+ n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
+ if (n > PRINT_BUFFER_LEN - 10) {
+ ALOGD("## %s", strbuf);
+ n = 0;
+ }
+ }
+ if (n > 0)
+ ALOGD("## %s", strbuf);
+ }
+
+ // print disassembled instructions
+ addr = startAddr;
+ while (addr < endAddr) {
+ next_addr = reinterpret_cast<unsigned char*>
+ (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
+ strbuf, PRINT_BUFFER_LEN));
+ if (addr != next_addr) {
+ ALOGD("** %p: %s", addr, strbuf);
+ } else { // check whether this is nop padding
+ if (addr[0] == 0x90) {
+ ALOGD("** %p: NOP (1 byte)", addr);
+ next_addr += 1;
+ } else if (addr[0] == 0x66 && addr[1] == 0x90) {
+ ALOGD("** %p: NOP (2 bytes)", addr);
+ next_addr += 2;
+ } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
+ ALOGD("** %p: NOP (3 bytes)", addr);
+ next_addr += 3;
+ } else {
+ ALOGD("** unable to decode binary at %p", addr);
+ break;
+ }
+ }
+ addr = next_addr;
+ }
+}
+
+/* 4 is the number of additional bytes needed for chaining information for trace:
+ * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
+#define EXTRA_BYTES_FOR_CHAINING 4
+
+/* Entry function to invoke the backend of the JIT compiler */
+void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
+{
+ dump_x86_inst = cUnit->printMe;
+ /* Used to hold the labels of each block */
+ LowOpBlockLabel *labelList =
+ (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
+ LowOp *headLIR = NULL;
+ GrowableList chainingListByType[kChainingCellLast];
+ unsigned int i, padding;
+
+ /*
+ * Initialize various types chaining lists.
+ */
+ for (i = 0; i < kChainingCellLast; i++) {
+ dvmInitGrowableList(&chainingListByType[i], 2);
+ }
+
+ /* Clear the visited flag for each block */
+ dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
+ kAllNodes, false /* isIterative */);
+
+ GrowableListIterator iterator;
+ dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
+
+ /* Traces start with a profiling entry point. Generate it here */
+ cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
+
+ //BasicBlock **blockList = cUnit->blockList;
+ GrowableList *blockList = &cUnit->blockList;
+ BasicBlock *bb;
+
+ info->codeAddress = NULL;
+ stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
+
+ // TODO: compile into a temporary buffer and then copy into the code cache.
+ // That would let us leave the code cache unprotected for a shorter time.
+ size_t unprotected_code_cache_bytes =
+ gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
+ UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+
+ streamStart = stream; /* trace start before alignment */
+ stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
+ stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
+ streamMethodStart = stream; /* code start */
+ for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
+ labelList[i].lop.generic.offset = -1;
+ }
+ cUnit->exceptionBlockId = -1;
+ for (i = 0; i < blockList->numUsed; i++) {
+ bb = (BasicBlock *) blockList->elemList[i];
+ if(bb->blockType == kExceptionHandling)
+ cUnit->exceptionBlockId = i;
+ }
+ startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
+ if(gDvm.executionMode == kExecutionModeNcgO1) {
+ //merge blocks ending with "goto" with the fall through block
+ if (cUnit->jitMode != kJitLoop)
+ for (i = 0; i < blockList->numUsed; i++) {
+ bb = (BasicBlock *) blockList->elemList[i];
+ bool merged = mergeBlock(bb);
+ while(merged) merged = mergeBlock(bb);
+ }
+ for (i = 0; i < blockList->numUsed; i++) {
+ bb = (BasicBlock *) blockList->elemList[i];
+ if(bb->blockType == kDalvikByteCode &&
+ bb->firstMIRInsn != NULL) {
+ preprocessingBB(bb);
+ }
+ }
+ preprocessingTrace();
+ }
+
+ /* Handle the content in each basic block */
+ for (i = 0; ; i++) {
+ MIR *mir;
+ bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
+ if (bb == NULL) break;
+ if (bb->visited == true) continue;
+
+ labelList[i].immOpnd.value = bb->startOffset;
+
+ if (bb->blockType >= kChainingCellLast) {
+ /*
+ * Append the label pseudo LIR first. Chaining cells will be handled
+ * separately afterwards.
+ */
+ dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
+ }
+
+ if (bb->blockType == kEntryBlock) {
+ labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
+ if (bb->firstMIRInsn == NULL) {
+ continue;
+ } else {
+ setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
+ //&labelList[blockList[i]->fallThrough->id]);
+ }
+ } else if (bb->blockType == kExitBlock) {
+ labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
+ labelList[i].lop.generic.offset = (stream - streamMethodStart);
+ goto gen_fallthrough;
+ } else if (bb->blockType == kDalvikByteCode) {
+ if (bb->hidden == true) continue;
+ labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
+ /* Reset the register state */
+#if 0
+ resetRegisterScoreboard(cUnit);
+#endif
+ } else {
+ switch (bb->blockType) {
+ case kChainingCellNormal:
+ labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
+ /* handle the codegen later */
+ dvmInsertGrowableList(
+ &chainingListByType[kChainingCellNormal], i);
+ break;
+ case kChainingCellInvokeSingleton:
+ labelList[i].lop.opCode2 =
+ ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
+ labelList[i].immOpnd.value =
+ (int) bb->containingMethod;
+ /* handle the codegen later */
+ dvmInsertGrowableList(
+ &chainingListByType[kChainingCellInvokeSingleton], i);
+ break;
+ case kChainingCellInvokePredicted:
+ labelList[i].lop.opCode2 =
+ ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
+ /*
+ * Move the cached method pointer from operand 1 to 0.
+ * Operand 0 was clobbered earlier in this routine to store
+ * the block starting offset, which is not applicable to
+ * predicted chaining cell.
+ */
+ //TODO
+ //labelList[i].operands[0] = labelList[i].operands[1];
+
+ /* handle the codegen later */
+ dvmInsertGrowableList(
+ &chainingListByType[kChainingCellInvokePredicted], i);
+ break;
+ case kChainingCellHot:
+ labelList[i].lop.opCode2 =
+ ATOM_PSEUDO_CHAINING_CELL_HOT;
+ /* handle the codegen later */
+ dvmInsertGrowableList(
+ &chainingListByType[kChainingCellHot], i);
+ break;
+ case kPCReconstruction:
+ /* Make sure exception handling block is next */
+ labelList[i].lop.opCode2 =
+ ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
+ //assert (i == cUnit->numBlocks - 2);
+ labelList[i].lop.generic.offset = (stream - streamMethodStart);
+ handlePCReconstruction(cUnit,
+ &labelList[cUnit->puntBlock->id]);
+ break;
+ case kExceptionHandling:
+ labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
+ labelList[i].lop.generic.offset = (stream - streamMethodStart);
+ //if (cUnit->pcReconstructionList.numUsed) {
+ scratchRegs[0] = PhysicalReg_EAX;
+ jumpToInterpPunt();
+ //call_dvmJitToInterpPunt();
+ //}
+ break;
+ case kChainingCellBackwardBranch:
+ labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
+ /* handle the codegen later */
+ dvmInsertGrowableList(
+ &chainingListByType[kChainingCellBackwardBranch],
+ i);
+ break;
+ default:
+ break;
+ }
+ continue;
+ }
+ {
+ //LowOp *headLIR = NULL;
+ const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
+ const u2 *startCodePtr = dexCode->insns;
+ const u2 *codePtr;
+ labelList[i].lop.generic.offset = (stream - streamMethodStart);
+ ALOGV("get ready to handle JIT bb %d type %d hidden %d",
+ bb->id, bb->blockType, bb->hidden);
+ for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
+ bb = nextBB;
+ bb->visited = true;
+ cUnit->nextCodegenBlock = NULL;
+
+ if(gDvm.executionMode == kExecutionModeNcgO1 &&
+ bb->blockType != kEntryBlock &&
+ bb->firstMIRInsn != NULL) {
+ startOfBasicBlock(bb);
+ int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
+ endOfBasicBlock(bb);
+ if(cg_ret < 0) {
+ endOfTrace(true/*freeOnly*/);
+ cUnit->baseAddr = NULL;
+ ALOGI("codeGenBasicBlockJit returns negative number");
+ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+ return;
+ }
+ } else {
+ for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+ startOfBasicBlock(bb); //why here for O0
+ Opcode dalvikOpCode = mir->dalvikInsn.opcode;
+ if((int)dalvikOpCode >= (int)kMirOpFirst) {
+ handleExtendedMIR(cUnit, mir);
+ continue;
+ }
+ InstructionFormat dalvikFormat =
+ dexGetFormatFromOpcode(dalvikOpCode);
+ ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
+ mir->offset, dalvikOpCode, dalvikFormat);
+ LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
+ /* Remember the first LIR for this block */
+ if (headLIR == NULL) {
+ headLIR = (LowOp*)boundaryLIR;
+ }
+ bool notHandled = true;
+ /*
+ * Debugging: screen the opcode first to see if it is in the
+ * do[-not]-compile list
+ */
+ bool singleStepMe =
+ gDvmJit.includeSelectedOp !=
+ ((gDvmJit.opList[dalvikOpCode >> 3] &
+ (1 << (dalvikOpCode & 0x7))) !=
+ 0);
+ if (singleStepMe || cUnit->allSingleStep) {
+ } else {
+ codePtr = startCodePtr + mir->offset;
+ //lower each byte code, update LIR
+ notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
+ if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
+ CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
+ ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
+ gDvmJit.codeCacheFull = true;
+ cUnit->baseAddr = NULL;
+ endOfTrace(true/*freeOnly*/);
+ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+ return;
+ }
+ }
+ if (notHandled) {
+ ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
+ mir->offset,
+ dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
+ dalvikFormat);
+ dvmAbort();
+ break;
+ }
+ } // end for
+ } // end else //JIT + O0 code generator
+ }
+ } // end for
+ /* Eliminate redundant loads/stores and delay stores into later slots */
+#if 0
+ dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
+ cUnit->lastLIRInsn);
+#endif
+ if (headLIR) headLIR = NULL;
+gen_fallthrough:
+ /*
+ * Check if the block is terminated due to trace length constraint -
+ * insert an unconditional branch to the chaining cell.
+ */
+ if (bb->needFallThroughBranch) {
+ jumpToBasicBlock(stream, bb->fallThrough->id);
+ }
+
+ }
+
+ char* streamChainingStart = (char*)stream;
+ /* Handle the chaining cells in predefined order */
+ for (i = 0; i < kChainingCellGap; i++) {
+ size_t j;
+ int *blockIdList = (int *) chainingListByType[i].elemList;
+
+ cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
+
+ /* No chaining cells of this type */
+ if (cUnit->numChainingCells[i] == 0)
+ continue;
+
+ /* Record the first LIR for a new type of chaining cell */
+ cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
+ for (j = 0; j < chainingListByType[i].numUsed; j++) {
+ int blockId = blockIdList[j];
+ BasicBlock *chainingBlock =
+ (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
+ blockId);
+
+ labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
+
+ /* Align this chaining cell first */
+#if 0
+ newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
+#endif
+ /* Insert the pseudo chaining instruction */
+ dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
+
+
+ switch (chainingBlock->blockType) {
+ case kChainingCellNormal:
+ handleNormalChainingCell(cUnit,
+ chainingBlock->startOffset, blockId, labelList);
+ break;
+ case kChainingCellInvokeSingleton:
+ handleInvokeSingletonChainingCell(cUnit,
+ chainingBlock->containingMethod, blockId, labelList);
+ break;
+ case kChainingCellInvokePredicted:
+ handleInvokePredictedChainingCell(cUnit, blockId);
+ break;
+ case kChainingCellHot:
+ handleHotChainingCell(cUnit,
+ chainingBlock->startOffset, blockId, labelList);
+ break;
+ case kChainingCellBackwardBranch:
+ handleBackwardBranchChainingCell(cUnit,
+ chainingBlock->startOffset, blockId, labelList);
+ break;
+ default:
+ ALOGE("Bad blocktype %d", chainingBlock->blockType);
+ dvmAbort();
+ break;
+ }
+
+ if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
+ ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
+ gDvmJit.codeCacheFull = true;
+ cUnit->baseAddr = NULL;
+ endOfTrace(true); /* need to free structures */
+ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+ return;
+ }
+ }
+ }
+#if 0
+ dvmCompilerApplyGlobalOptimizations(cUnit);
+#endif
+ endOfTrace(false);
+
+ if (gDvmJit.codeCacheFull) {
+ /* We hit code cache size limit inside endofTrace(false).
+ * Bail out for this trace!
+ */
+ ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
+ cUnit->baseAddr = NULL;
+ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+ return;
+ }
+
+ /* dump section for chaining cell counts, make sure it is 4-byte aligned */
+ padding = (4 - ((u4)stream & 3)) & 3;
+ stream += padding;
+ ChainCellCounts chainCellCounts;
+ /* Install the chaining cell counts */
+ for (i=0; i< kChainingCellGap; i++) {
+ chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
+ }
+ char* streamCountStart = (char*)stream;
+ memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
+ stream += sizeof(chainCellCounts);
+
+ cUnit->baseAddr = streamMethodStart;
+ cUnit->totalSize = (stream - streamStart);
+ if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
+ ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
+ gDvmJit.codeCacheFull = true;
+ cUnit->baseAddr = NULL;
+ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+ return;
+ }
+
+ /* write chaining cell count offset & chaining cell offset */
+ u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
+ *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
+ pOffset[1] = streamChainingStart - streamMethodStart;
+
+ PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
+
+ gDvmJit.codeCacheByteUsed += (stream - streamStart);
+ if (cUnit->printMe) {
+ unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
+ unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
+ ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
+ cUnit->method->clazz->descriptor, cUnit->method->name,
+ codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
+ ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
+ cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
+ printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
+ }
+ ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
+ (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
+ cUnit->totalSize, gDvmJit.codeCache);
+
+ gDvmJit.numCompilations++;
+
+ info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
+}
+
+/*
+ * Perform translation chain operation.
+ */
+void* dvmJitChain(void* tgtAddr, u4* branchAddr)
+{
+#ifdef JIT_CHAIN
+ int relOffset = (int) tgtAddr - (int)branchAddr;
+
+ if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
+ (gDvmJit.codeCacheFull == false)) {
+
+ gDvmJit.translationChains++;
+
+ //OpndSize immSize = estOpndSizeFromImm(relOffset);
+ //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
+ /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
+ * the original code sequence.
+ */
+ OpndSize immSize = OpndSize_32;
+ relOffset -= 5;
+ //can't use stream here since it is used by the compilation thread
+ UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
+ dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
+ PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
+
+ gDvmJit.hasNewChain = true;
+
+ COMPILER_TRACE_CHAINING(
+ ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
+ (int) branchAddr, tgtAddr, relOffset));
+ }
+#endif
+ return tgtAddr;
+}
+
+/*
+ * Accept the work and start compiling. Returns true if compilation
+ * is attempted.
+ */
+bool dvmCompilerDoWork(CompilerWorkOrder *work)
+{
+ JitTraceDescription *desc;
+ bool isCompile;
+ bool success = true;
+
+ if (gDvmJit.codeCacheFull) {
+ return false;
+ }
+
+ switch (work->kind) {
+ case kWorkOrderTrace:
+ isCompile = true;
+ /* Start compilation with maximally allowed trace length */
+ desc = (JitTraceDescription *)work->info;
+ success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
+ work->bailPtr, 0 /* no hints */);
+ break;
+ case kWorkOrderTraceDebug: {
+ bool oldPrintMe = gDvmJit.printMe;
+ gDvmJit.printMe = true;
+ isCompile = true;
+ /* Start compilation with maximally allowed trace length */
+ desc = (JitTraceDescription *)work->info;
+ success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
+ work->bailPtr, 0 /* no hints */);
+ gDvmJit.printMe = oldPrintMe;
+ break;
+ }
+ case kWorkOrderProfileMode:
+ dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
+ isCompile = false;
+ break;
+ default:
+ isCompile = false;
+ ALOGE("Jit: unknown work order type");
+ assert(0); // Bail if debug build, discard otherwise
+ }
+ if (!success)
+ work->result.codeAddress = NULL;
+ return isCompile;
+}
+
+void dvmCompilerCacheFlush(long start, long end, long flags) {
+ /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
+}
+
+//#endif