diff options
author | Steve Kondik <shade@chemlab.org> | 2013-11-11 00:32:52 -0800 |
---|---|---|
committer | Steve Kondik <shade@chemlab.org> | 2013-11-11 00:32:52 -0800 |
commit | bab417cc2aceee45238d5648975118bf3dd4c2e9 (patch) | |
tree | 39f1867dee9fe25cf7174917ef39ea3dd361fca4 /vm | |
parent | 5531b23c1546fdf896db25f7412291bada6e723c (diff) | |
parent | e17852495a15ddad079305c725d067ac95e4d655 (diff) | |
download | android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.tar.gz android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.tar.bz2 android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.zip |
Merge branch 'kk_2.7_rb1.9' of git://codeaurora.org/platform/dalvik into caf
Change-Id: I885fab2470352d0a625c9946d0d5c9111486b713
Diffstat (limited to 'vm')
50 files changed, 2460 insertions, 189 deletions
diff --git a/vm/Android.mk b/vm/Android.mk index e5d5448ce..8cca3f39b 100644 --- a/vm/Android.mk +++ b/vm/Android.mk @@ -39,6 +39,16 @@ else endif host_smp_flag := -DANDROID_SMP=1 +ifeq ($(ARCH_ARM_HAVE_ARMV7A),true) + target_inline_arg5_flag := -DINLINE_ARG_EXPANDED + host_inline_arg5_flag := -DINLINE_ARG_EXPANDED +else + target_inline_arg5_flag := + host_inline_arg5_flag := +endif + + + # Build the installed version (libdvm.so) first WITH_JIT := true include $(LOCAL_PATH)/ReconfigureDvm.mk @@ -55,9 +65,9 @@ ifneq ($(strip $(WITH_ADDRESS_SANITIZER)),) LOCAL_CFLAGS := $(filter-out $(CLANG_CONFIG_UNKNOWN_CFLAGS),$(LOCAL_CFLAGS)) endif +LOCAL_CFLAGS += $(target_inline_arg5_flag) # TODO: split out the asflags. LOCAL_ASFLAGS := $(LOCAL_CFLAGS) - include $(BUILD_SHARED_LIBRARY) # Derivation #1 @@ -65,6 +75,7 @@ include $(BUILD_SHARED_LIBRARY) include $(LOCAL_PATH)/ReconfigureDvm.mk LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \ -DWITH_JIT_TUNING $(target_smp_flag) +LOCAL_CFLAGS += $(target_inline_arg5_flag) # TODO: split out the asflags. LOCAL_ASFLAGS := $(LOCAL_CFLAGS) LOCAL_MODULE := libdvm_assert @@ -77,6 +88,7 @@ ifneq ($(dvm_arch),mips) # MIPS support for self-verification is incomplete include $(LOCAL_PATH)/ReconfigureDvm.mk LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \ -DWITH_SELF_VERIFICATION $(target_smp_flag) + LOCAL_CFLAGS += $(target_inline_arg5_flag) # TODO: split out the asflags. LOCAL_ASFLAGS := $(LOCAL_CFLAGS) LOCAL_MODULE := libdvm_sv @@ -135,6 +147,7 @@ ifeq ($(WITH_HOST_DALVIK),true) endif LOCAL_CFLAGS += $(host_smp_flag) + LOCAL_CFLAGS += $(host_inline_arg5_flag) # TODO: split out the asflags. LOCAL_ASFLAGS := $(LOCAL_CFLAGS) LOCAL_MODULE_TAGS := optional diff --git a/vm/DalvikCrashDump.cpp b/vm/DalvikCrashDump.cpp new file mode 100644 index 000000000..c940d3ebf --- /dev/null +++ b/vm/DalvikCrashDump.cpp @@ -0,0 +1,31 @@ +/** + * Copyright (c) 2012, The Linux Foundation. All rights reserved. + * Not a Contribution, Apache license notifications and license are retained + * for attribution purposes only. + * + * Copyright (c) 2005-2008, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Support files for dump Dalvik info during crash from debuggerd + **/ + +#include <Dalvik.h> +#include "DalvikCrashDump.h" + +/* Add hook to dump dalvik information */ +__attribute__ ((weak)) +void dump_dalvik(ptrace_context_t* context, log_t* log, pid_t tid, bool at_fault) +{ + ALOGE("[Dalvik] No information available \n"); +} diff --git a/vm/DalvikCrashDump.h b/vm/DalvikCrashDump.h new file mode 100644 index 000000000..633179448 --- /dev/null +++ b/vm/DalvikCrashDump.h @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met, + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Hooks for debuggerd to call into libdvm during a crash to dump + * dalvik related information + **/ + +#ifndef DALVIK_CRASH_DUMP_H +#define DALVIK_CRASH_DUMP_H + +#include <corkscrew/ptrace.h> + +#ifdef HAS_LIBDVM +# include <utility.h> +#else +# include <../system/core/debuggerd/utility.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +void dump_dalvik (ptrace_context_t* context, log_t* log, pid_t tid, bool at_fault); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/vm/DalvikVersion.h b/vm/DalvikVersion.h index e71c8393c..da67439c4 100644 --- a/vm/DalvikVersion.h +++ b/vm/DalvikVersion.h @@ -32,6 +32,6 @@ * way classes load changes, e.g. field ordering or vtable layout. Changing * this guarantees that the optimized form of the DEX file is regenerated. */ -#define DALVIK_VM_BUILD 27 +#define DALVIK_VM_BUILD 28 /* Increment for device extension */ #endif // DALVIK_VERSION_H_ @@ -239,6 +239,7 @@ ifeq ($(dvm_arch),arm) #LOCAL_CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=vfp LOCAL_CFLAGS += -Werror MTERP_ARCH_KNOWN := true + # Select architecture-specific sources (armv5te, armv7-a, etc.) LOCAL_SRC_FILES += \ arch/arm/CallOldABI.S \ @@ -248,6 +249,9 @@ ifeq ($(dvm_arch),arm) mterp/out/InterpAsm-$(dvm_arch_variant).S ifeq ($(WITH_JIT),true) + # Debuggerd support + LOCAL_SRC_FILES += DalvikCrashDump.cpp + LOCAL_SRC_FILES += \ compiler/codegen/RallocUtil.cpp \ compiler/codegen/arm/$(dvm_arch_variant)/Codegen.cpp \ @@ -259,6 +263,12 @@ ifeq ($(dvm_arch),arm) compiler/codegen/arm/ArmRallocUtil.cpp \ compiler/template/out/CompilerTemplateAsm-$(dvm_arch_variant).S endif + + ifeq ($(WITH_QC_PERF),true) + LOCAL_WHOLE_STATIC_LIBRARIES += libqc-dalvik + LOCAL_SHARED_LIBRARIES += libqc-opt + LOCAL_CFLAGS += -DWITH_QC_PERF + endif endif ifeq ($(dvm_arch),mips) diff --git a/vm/Globals.h b/vm/Globals.h index 29f7356ad..bfd2b7e12 100644 --- a/vm/Globals.h +++ b/vm/Globals.h @@ -312,6 +312,7 @@ struct DvmGlobals { ClassObject* exNoSuchFieldException; ClassObject* exNoSuchMethodError; ClassObject* exNullPointerException; + ClassObject* exNumberFormatException; ClassObject* exOutOfMemoryError; ClassObject* exRuntimeException; ClassObject* exStackOverflowError; diff --git a/vm/Init.cpp b/vm/Init.cpp index f1762c96c..1a99e12ab 100644 --- a/vm/Init.cpp +++ b/vm/Init.cpp @@ -1374,6 +1374,13 @@ private: }; /* + * Hook for post-init functions + */ +__attribute__((weak)) void dvmPostInitZygote(void) { + ; +} + +/* * VM initialization. Pass in any options provided on the command line. * Do not pass in the class name or the options for the class. * @@ -1572,6 +1579,7 @@ std::string dvmStartup(int argc, const char* const argv[], if (!initZygote()) { return "initZygote failed"; } + dvmPostInitZygote(); } else { if (!dvmInitAfterZygote()) { return "dvmInitAfterZygote failed"; diff --git a/vm/InitRefs.cpp b/vm/InitRefs.cpp index 08c28f856..06f99e7a9 100644 --- a/vm/InitRefs.cpp +++ b/vm/InitRefs.cpp @@ -104,6 +104,7 @@ static bool initClassReferences() { { &gDvm.exNoSuchFieldException, "Ljava/lang/NoSuchFieldException;" }, { &gDvm.exNoSuchMethodError, "Ljava/lang/NoSuchMethodError;" }, { &gDvm.exNullPointerException, "Ljava/lang/NullPointerException;" }, + { &gDvm.exNumberFormatException, "Ljava/lang/NumberFormatException;" }, { &gDvm.exOutOfMemoryError, "Ljava/lang/OutOfMemoryError;" }, { &gDvm.exRuntimeException, "Ljava/lang/RuntimeException;" }, { &gDvm.exStackOverflowError, "Ljava/lang/StackOverflowError;" }, @@ -477,6 +478,10 @@ static bool verifyStringOffsets() { return ok; } +__attribute__((weak)) bool verifyExtra(){ + return true; +} + /* (documented in header) */ bool dvmFindRequiredClassesAndMembers() { /* @@ -491,7 +496,8 @@ bool dvmFindRequiredClassesAndMembers() { && initDirectMethodReferences() && initVirtualMethodOffsets() && initFinalizerReference() - && verifyStringOffsets(); + && verifyStringOffsets() + && verifyExtra(); } /* (documented in header) */ diff --git a/vm/InlineNative.cpp b/vm/InlineNative.cpp index 00c1e9554..97f4d3964 100644 --- a/vm/InlineNative.cpp +++ b/vm/InlineNative.cpp @@ -913,3 +913,40 @@ bool dvmPerformInlineOp4Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3, TRACE_METHOD_EXIT(self, method); return result; } + +#ifdef INLINE_ARG_EXPANDED +bool dvmPerformInlineOp5Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, int opIndex, u4 arg4) +{ + Method* method = dvmResolveInlineNative(opIndex); + if (method == NULL) { + return ((InlineOp5Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, + pResult, arg4); + } + + Thread* self = dvmThreadSelf(); + TRACE_METHOD_ENTER(self, method); + bool result = ((InlineOp5Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, + pResult, arg4); + TRACE_METHOD_EXIT(self, method); + return result; +} + +bool dvmPerformInlineOp7Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, int opIndex, u4 arg4, u4 arg5, u4 arg6) +{ + Method* method = dvmResolveInlineNative(opIndex); + if (method == NULL) { + return ((InlineOp7Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, + pResult, arg4, arg5, arg6); + } + + Thread* self = dvmThreadSelf(); + TRACE_METHOD_ENTER(self, method); + bool result = ((InlineOp7Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, + pResult, arg4, arg5, arg6); + TRACE_METHOD_EXIT(self, method); + return result; +} +#endif + diff --git a/vm/InlineNative.h b/vm/InlineNative.h index fe14f8bf8..5d3dd13ec 100644 --- a/vm/InlineNative.h +++ b/vm/InlineNative.h @@ -33,6 +33,12 @@ Method* dvmFindInlinableMethod(const char* classDescriptor, typedef bool (*InlineOp4Func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3, JValue* pResult); +#ifdef INLINE_ARG_EXPANDED +typedef bool (*InlineOp5Func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, u4 arg4); +typedef bool (*InlineOp7Func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, u4 arg4, u4 arg5, u4 arg6); +#endif /* * Table of inline operations. * @@ -117,12 +123,45 @@ INLINE bool dvmPerformInlineOp4Std(u4 arg0, u4 arg1, u4 arg2, u4 arg3, return (*gDvmInlineOpsTable[opIndex].func)(arg0, arg1, arg2, arg3, pResult); } +#ifdef INLINE_ARG_EXPANDED +/* + * Perform the operation specified by "opIndex". + * + * We want the arguments to appear in the first 4 registers so they can + * be passed straight through to the handler function. Ideally on ARM + * they'll go into r0-r3 and stay there. + * + * Returns "true" if everything went normally, "false" if an exception + * was thrown. + */ +INLINE bool dvmPerformInlineOp5Std(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, int opIndex, u4 arg4) +{ + return ((InlineOp5Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, pResult, arg4); +} + +INLINE bool dvmPerformInlineOp7Std(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, int opIndex, u4 arg4, u4 arg5, u4 arg6) +{ + return ((InlineOp7Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, pResult, arg4, arg5, arg6); +} + +#endif /* * Like the "std" version, but will emit profiling info. */ bool dvmPerformInlineOp4Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3, JValue* pResult, int opIndex); +#ifdef INLINE_ARG_EXPANDED +/* + * Like the "std" version, but will emit profiling info. + */ +bool dvmPerformInlineOp5Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, int opIndex, u4 arg4); +bool dvmPerformInlineOp7Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult, int opIndex, u4 arg4, u4 arg5, u4 arg6); +#endif /* * Return method & populate the table on first use. */ @@ -38,6 +38,8 @@ #define LW_HASH_STATE_MASK 0x3 #define LW_HASH_STATE_SHIFT 1 #define LW_HASH_STATE(x) (((x) >> LW_HASH_STATE_SHIFT) & LW_HASH_STATE_MASK) +#define LW_HASH_STATE_SIZE 2 +#define LW_HASH_STATE_ABS_MASK 0x6 /* * Monitor accessor. Extracts a monitor structure pointer from a fat diff --git a/vm/alloc/Heap.cpp b/vm/alloc/Heap.cpp index c11ff79dd..b108f90a2 100644 --- a/vm/alloc/Heap.cpp +++ b/vm/alloc/Heap.cpp @@ -29,16 +29,17 @@ #include "alloc/HeapSource.h" #include "alloc/MarkSweep.h" #include "os/os.h" - #include <sys/mman.h> +#include "hprof/Hprof.h" #include <sys/resource.h> #include <sys/time.h> #include <limits.h> #include <errno.h> - #include <cutils/trace.h> -#ifdef __BIONIC__ -#include <cutils/properties.h> +#include <cutils/process_name.h> + +#ifdef HAVE_ANDROID_OS +#include "cutils/properties.h" static int debugalloc() { @@ -196,8 +197,13 @@ static void gcForMalloc(bool clearSoftReferences) */ static void *tryMalloc(size_t size) { +#ifdef HAVE_ANDROID_OS + char prop_value[PROPERTY_VALUE_MAX] = {'\0'}; +#endif + char* hprof_file = NULL; void *ptr; - + int result = -1; + int debug_oom = 0; //TODO: figure out better heuristics // There will be a lot of churn if someone allocates a bunch of // big objects in a row, and we hit the frag case each time. @@ -212,7 +218,6 @@ static void *tryMalloc(size_t size) if (ptr != NULL) { return ptr; } - /* * The allocation failed. If the GC is running, block until it * completes and retry. @@ -252,7 +257,6 @@ static void *tryMalloc(size_t size) FRACTIONAL_MB(newHeapSize), size); return ptr; } - /* Most allocations should have succeeded by now, so the heap * is really full, really fragmented, or the requested size is * really big. Do another GC, collecting SoftReferences this @@ -273,6 +277,46 @@ static void *tryMalloc(size_t size) //TODO: tell the HeapSource to dump its state dvmDumpThread(dvmThreadSelf(), false); +#ifdef HAVE_ANDROID_OS + /* Read the property to check whether hprof should be generated or not */ + property_get("dalvik.debug.oom",prop_value,"0"); + debug_oom = atoi(prop_value); +#endif + if(debug_oom == 1) { + LOGE_HEAP("Generating hprof for process: %s PID: %d", + get_process_name(),getpid()); + dvmUnlockHeap(); + + /* allocate memory for hprof file name. Allocate approx 30 bytes. + * 11 byte for directory path, 10 bytes for pid, 6 bytes for + * extension + "\0'. + */ + hprof_file = (char*) malloc (sizeof(char) * 30); + + /* creation of hprof will fail if /data/misc permission is not set + * to 0777. + */ + + if(hprof_file) { + snprintf(hprof_file,30,"/data/misc/%d.hprof",getpid()); + LOGE_HEAP("Generating hprof in file: %s",hprof_file ); + + result = hprofDumpHeap(hprof_file, -1, false); + free(hprof_file); + } else { + LOGE_HEAP("Failed to allocate memory for file name." + "Generating hprof in default file: /data/misc/app_oom.hprof"); + result = hprofDumpHeap("/data/misc/app_oom.hprof", -1, false); + } + dvmLockMutex(&gDvm.gcHeapLock); + + if (result != 0) { + /* ideally we'd throw something more specific based on actual failure */ + dvmThrowRuntimeException( + "Failure during heap dump; check log output for details"); + LOGE_HEAP(" hprofDumpHeap failed with result: %d ",result); + } + } return NULL; } diff --git a/vm/analysis/Optimize.cpp b/vm/analysis/Optimize.cpp index b61b82c18..3e955cbbb 100644 --- a/vm/analysis/Optimize.cpp +++ b/vm/analysis/Optimize.cpp @@ -50,7 +50,6 @@ static bool rewriteExecuteInlineRange(Method* method, u2* insns, static void rewriteReturnVoid(Method* method, u2* insns); static bool needsReturnBarrier(Method* method); - /* * Create a table of inline substitutions. Sets gDvm.inlineSubs. * @@ -968,6 +967,8 @@ static bool rewriteInvokeObjectInit(Method* method, u2* insns) LOGVV("DexOpt: replaced Object.<init> in %s.%s", method->clazz->descriptor, method->name); + }else{ + return false; } return true; diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h index 73efad834..a97ccb10f 100644 --- a/vm/compiler/CompilerIR.h +++ b/vm/compiler/CompilerIR.h @@ -76,6 +76,7 @@ typedef struct ChainCellCounts { u1 count[kChainingCellLast]; /* include one more space for the gap # */ u4 dummyForAlignment; } u; + u4 extraSize; } ChainCellCounts; typedef struct LIR { @@ -175,6 +176,8 @@ typedef struct BasicBlock { BlockListType blockListType; // switch and exception handling GrowableList blocks; } successorBlockList; + + LIR *blockLabelLIR; } BasicBlock; /* @@ -213,6 +216,7 @@ typedef struct CompilationUnit { int numClassPointers; LIR *chainCellOffsetLIR; GrowableList pcReconstructionList; + GrowableList pcReconstructionListExtended; int headerSize; // bytes before the first code ptr int dataOffset; // starting offset of literal pool int totalSize; // header + code size @@ -228,9 +232,11 @@ typedef struct CompilationUnit { bool heapMemOp; // Mark mem ops for self verification bool usesLinkRegister; // For self-verification only int profileCodeSize; // Size of the profile prefix in bytes + GrowableList chainingListByType[kChainingCellGap]; int numChainingCells[kChainingCellGap]; LIR *firstChainingLIR[kChainingCellGap]; LIR *chainingCellBottom; + int chainingCellExtraSize; struct RegisterPool *regPool; int optRound; // round number to tell an LIR's age jmp_buf *bailPtr; @@ -248,6 +254,7 @@ typedef struct CompilationUnit { /* Data structure for loop analysis and optimizations */ struct LoopAnalysis *loopAnalysis; + bool hasHoistedChecks; /* Map SSA names to location */ RegLocation *regLocation; @@ -278,6 +285,12 @@ typedef struct CompilationUnit { bool printSSANames; void *blockLabelList; bool quitLoopMode; // cold path/complex bytecode + void *labelList; + bool setCCode; // gen instruction that sets ccodes + // the flag must be set before calling + // codegen function and reset upon completion + + void *extraData; // placeholder } CompilationUnit; #if defined(WITH_SELF_VERIFICATION) diff --git a/vm/compiler/Dataflow.cpp b/vm/compiler/Dataflow.cpp index 7bed8396a..e60931489 100644 --- a/vm/compiler/Dataflow.cpp +++ b/vm/compiler/Dataflow.cpp @@ -814,6 +814,22 @@ int dvmConvertSSARegToDalvik(const CompilationUnit *cUnit, int ssaReg) } /* + * Utility function to populate attributes based on the DEX opcode + */ +__attribute__((weak)) int dvmGetDexOptAttributes(const DecodedInstruction* instr) +{ + int result = 0; + if (instr) { + Opcode opcode = instr->opcode; + if ((opcode >= OP_NOP) && (opcode < (Opcode)kMirOpLast)) { + result = dvmCompilerDataFlowAttributes[opcode]; + } + } + + return result; +} + +/* * Utility function to convert encoded SSA register value into Dalvik register * and subscript pair. Each SSA register can be used to index the * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping. @@ -823,9 +839,12 @@ char *dvmCompilerGetDalvikDisassembly(const DecodedInstruction *insn, { char buffer[256]; Opcode opcode = insn->opcode; - int dfAttributes = dvmCompilerDataFlowAttributes[opcode]; + int dfAttributes = 0; int flags; char *ret; + if (insn) { + dfAttributes = dvmGetDexOptAttributes(insn); + } buffer[0] = 0; if ((int)opcode >= (int)kMirOpFirst) { @@ -925,11 +944,14 @@ char *dvmCompilerFullDisassembler(const CompilationUnit *cUnit, char operand0[256], operand1[256]; const DecodedInstruction *insn = &mir->dalvikInsn; int opcode = insn->opcode; - int dfAttributes = dvmCompilerDataFlowAttributes[opcode]; + int dfAttributes = 0; char *ret; int length; OpcodeFlags flags; + if (insn) { + dvmGetDexOptAttributes(insn); + } buffer[0] = 0; if (opcode >= kMirOpFirst) { if (opcode == kMirOpPhi) { @@ -1121,7 +1143,7 @@ bool dvmCompilerFindLocalLiveIn(CompilationUnit *cUnit, BasicBlock *bb) for (mir = bb->firstMIRInsn; mir; mir = mir->next) { int dfAttributes = - dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode]; + dvmGetDexOptAttributes(&mir->dalvikInsn); DecodedInstruction *dInsn = &mir->dalvikInsn; if (dfAttributes & DF_HAS_USES) { @@ -1211,6 +1233,16 @@ static void dataFlowSSAFormat3RC(CompilationUnit *cUnit, MIR *mir) } } +/* check for invoke instructions in the loop trace */ +__attribute__((weak)) void dvmCompilerCheckMIR(CompilationUnit *cUnit, MIR *mir) +{ + int flags = dexGetFlagsFromOpcode(mir->dalvikInsn.opcode); + + if (flags & kInstrInvoke) { + cUnit->hasInvoke = true; + } +} + /* Entry function to convert a block into SSA representation */ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb) { @@ -1219,11 +1251,14 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb) if (bb->dataFlowInfo == NULL) return false; for (mir = bb->firstMIRInsn; mir; mir = mir->next) { + + dvmCompilerCheckMIR(cUnit, mir); + mir->ssaRep = (struct SSARepresentation *) dvmCompilerNew(sizeof(SSARepresentation), true); int dfAttributes = - dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode]; + dvmGetDexOptAttributes(&mir->dalvikInsn); int numUses = 0; @@ -1261,6 +1296,8 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb) false); mir->ssaRep->fpUse = (bool *)dvmCompilerNew(sizeof(bool) * numUses, false); + mir->ssaRep->wideUse = (bool *)dvmCompilerNew(sizeof(bool) * numUses, + false); } int numDefs = 0; @@ -1278,6 +1315,8 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb) false); mir->ssaRep->fpDef = (bool *)dvmCompilerNew(sizeof(bool) * numDefs, false); + mir->ssaRep->wideDef = (bool *)dvmCompilerNew(sizeof(bool) * numDefs, + false); } DecodedInstruction *dInsn = &mir->dalvikInsn; @@ -1286,37 +1325,48 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb) numUses = 0; if (dfAttributes & DF_UA) { mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_A; + mir->ssaRep->wideUse[numUses] = false; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA, numUses++); } else if (dfAttributes & DF_UA_WIDE) { mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_A; + mir->ssaRep->wideUse[numUses] = true; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA, numUses++); mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_A; + mir->ssaRep->wideUse[numUses] = true; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA+1, numUses++); } if (dfAttributes & DF_UB) { mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_B; + mir->ssaRep->wideUse[numUses] = false; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB, numUses++); } else if (dfAttributes & DF_UB_WIDE) { mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_B; + mir->ssaRep->wideUse[numUses] = true; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB, numUses++); mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_B; + mir->ssaRep->wideUse[numUses] = true; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB+1, numUses++); } if (dfAttributes & DF_UC) { mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_C; + mir->ssaRep->wideUse[numUses] = false; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC, numUses++); } else if (dfAttributes & DF_UC_WIDE) { mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_C; + mir->ssaRep->wideUse[numUses] = true; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC, numUses++); mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_C; + mir->ssaRep->wideUse[numUses] = true; handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC+1, numUses++); } } if (dfAttributes & DF_HAS_DEFS) { mir->ssaRep->fpDef[0] = dfAttributes & DF_FP_A; + mir->ssaRep->wideDef[0] = dfAttributes & DF_DA_WIDE; handleSSADef(cUnit, mir->ssaRep->defs, dInsn->vA, 0); if (dfAttributes & DF_DA_WIDE) { mir->ssaRep->fpDef[1] = dfAttributes & DF_FP_A; + mir->ssaRep->wideDef[1] = true; handleSSADef(cUnit, mir->ssaRep->defs, dInsn->vA+1, 1); } } @@ -1339,7 +1389,7 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb) /* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */ static void setConstant(CompilationUnit *cUnit, int ssaReg, int value) { - dvmSetBit(cUnit->isConstantV, ssaReg); + dvmCompilerSetBit(cUnit->isConstantV, ssaReg); cUnit->constantValues[ssaReg] = value; } @@ -1350,7 +1400,7 @@ bool dvmCompilerDoConstantPropagation(CompilationUnit *cUnit, BasicBlock *bb) for (mir = bb->firstMIRInsn; mir; mir = mir->next) { int dfAttributes = - dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode]; + dvmGetDexOptAttributes(&mir->dalvikInsn); DecodedInstruction *dInsn = &mir->dalvikInsn; @@ -1437,7 +1487,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit, /* Find basic induction variable first */ for (mir = bb->firstMIRInsn; mir; mir = mir->next) { int dfAttributes = - dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode]; + dvmGetDexOptAttributes(&mir->dalvikInsn); if (!(dfAttributes & DF_IS_LINEAR)) continue; @@ -1481,7 +1531,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit, break; } if (deltaIsConstant) { - dvmSetBit(isIndVarV, mir->ssaRep->uses[0]); + dvmCompilerSetBit(isIndVarV, mir->ssaRep->uses[0]); InductionVariableInfo *ivInfo = (InductionVariableInfo *) dvmCompilerNew(sizeof(InductionVariableInfo), false); @@ -1502,7 +1552,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit, /* Find dependent induction variable now */ for (mir = bb->firstMIRInsn; mir; mir = mir->next) { int dfAttributes = - dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode]; + dvmGetDexOptAttributes(&mir->dalvikInsn); if (!(dfAttributes & DF_IS_LINEAR)) continue; @@ -1554,7 +1604,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit, if (cIsConstant) { unsigned int i; - dvmSetBit(isIndVarV, mir->ssaRep->defs[0]); + dvmCompilerSetBit(isIndVarV, mir->ssaRep->defs[0]); InductionVariableInfo *ivInfo = (InductionVariableInfo *) dvmCompilerNew(sizeof(InductionVariableInfo), false); diff --git a/vm/compiler/Dataflow.h b/vm/compiler/Dataflow.h index f04c91ca8..67b14c559 100644 --- a/vm/compiler/Dataflow.h +++ b/vm/compiler/Dataflow.h @@ -97,9 +97,11 @@ typedef struct SSARepresentation { int numUses; int *uses; bool *fpUse; + bool *wideUse; int numDefs; int *defs; bool *fpDef; + bool *wideDef; } SSARepresentation; /* @@ -125,4 +127,6 @@ typedef struct ArrayAccessInfo { #define DECODE_REG(v) (v & 0xffff) #define DECODE_SUB(v) (((unsigned int) v) >> 16) +extern int dvmGetDexOptAttributes(const DecodedInstruction* instr); + #endif // DALVIK_VM_DATAFLOW_H_ diff --git a/vm/compiler/Frontend.cpp b/vm/compiler/Frontend.cpp index 47c1898a0..65ef1ebd9 100644 --- a/vm/compiler/Frontend.cpp +++ b/vm/compiler/Frontend.cpp @@ -219,11 +219,11 @@ static int analyzeInlineTarget(DecodedInstruction *dalvikInsn, int attributes, } if (!(flags & kInstrCanReturn)) { - if (!(dvmCompilerDataFlowAttributes[dalvikOpcode] & + if (!(dvmGetDexOptAttributes(dalvikInsn) & DF_IS_GETTER)) { attributes &= ~METHOD_IS_GETTER; } - if (!(dvmCompilerDataFlowAttributes[dalvikOpcode] & + if (!(dvmGetDexOptAttributes(dalvikInsn) & DF_IS_SETTER)) { attributes &= ~METHOD_IS_SETTER; } @@ -1355,6 +1355,41 @@ bool dvmCompileMethod(const Method *method, JitTranslationInfo *info) return false; } +/* + * Utility funtion to check the DEX opcode for correctness + */ +__attribute__((weak)) bool dvmVerifyDex(CompilationUnit *cUnit, BasicBlock *curBlock, + const u2* codePtr, MIR *insn) +{ + bool result = false; + if (insn) { + if ((insn->dalvikInsn.opcode >= OP_NOP) && + (insn->dalvikInsn.opcode < OP_UNUSED_FF)) { + result = true; + } + } + return result; +} + +/* dump simple trace property */ +__attribute__((weak)) void dvmDumpLoopTraceStats(CompilationUnit *cUnit) +{ + if(cUnit->printMe){ + ALOGV("hasInvoke %d",cUnit->hasInvoke); + } +} + +/* dump reglocation info of a loop trace */ +__attribute__((weak)) void dvmCompilerDumpRegLocationInfo(CompilationUnit *cUnit) +{ + if(cUnit->printMe){ + int i; + for (i=0; i< cUnit->numSSARegs; i++) { + ALOGV("LOC %d:%d",i,cUnit->regLocation[i].sRegLow); + } + } +} + /* Extending the trace by crawling the code from curBlock */ static bool exhaustTrace(CompilationUnit *cUnit, BasicBlock *curBlock) { @@ -1392,6 +1427,7 @@ static bool exhaustTrace(CompilationUnit *cUnit, BasicBlock *curBlock) if (width == 0) break; + dvmVerifyDex(cUnit, curBlock, codePtr + width, insn); dvmCompilerAppendMIR(curBlock, insn); codePtr += width; @@ -1464,11 +1500,15 @@ static bool compileLoop(CompilationUnit *cUnit, unsigned int startOffset, cUnit->jitMode = kJitLoop; + /* reset number of insns in the trace */ + cUnit->numInsts=0; + /* Initialize the block list */ dvmInitGrowableList(&cUnit->blockList, 4); /* Initialize the PC reconstruction list */ dvmInitGrowableList(&cUnit->pcReconstructionList, 8); + dvmInitGrowableList(&cUnit->pcReconstructionListExtended, 1); /* Create the default entry and exit blocks and enter them to the list */ BasicBlock *entryBlock = dvmCompilerNewBB(kEntryBlock, numBlocks++); @@ -1530,6 +1570,8 @@ static bool compileLoop(CompilationUnit *cUnit, unsigned int startOffset, if (!dvmCompilerBuildLoop(cUnit)) goto bail; + dvmDumpLoopTraceStats(cUnit); + dvmCompilerLoopOpt(cUnit); /* @@ -1547,6 +1589,8 @@ static bool compileLoop(CompilationUnit *cUnit, unsigned int startOffset, /* Allocate Registers using simple local allocation scheme */ dvmCompilerLocalRegAlloc(cUnit); + dvmCompilerDumpRegLocationInfo(cUnit); + /* Convert MIR to LIR, etc. */ dvmCompilerMIR2LIR(cUnit); #endif @@ -1693,6 +1737,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, /* Initialize the PC reconstruction list */ dvmInitGrowableList(&cUnit.pcReconstructionList, 8); + dvmInitGrowableList(&cUnit.pcReconstructionListExtended, 1); /* Initialize the basic block list */ blockList = &cUnit.blockList; dvmInitGrowableList(blockList, 8); @@ -1800,6 +1845,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, curBB = dvmCompilerNewBB(kEntryBlock, numBlocks++); dvmInsertGrowableList(blockList, (intptr_t) curBB); curBB->startOffset = curOffset; + cUnit.entryBlock = curBB; entryCodeBB = dvmCompilerNewBB(kDalvikByteCode, numBlocks++); dvmInsertGrowableList(blockList, (intptr_t) entryCodeBB); @@ -1826,9 +1872,12 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts, /* The trace should never incude instruction data */ assert(width); insn->width = width; + + dvmVerifyDex(&cUnit, curBB, codePtr + width, insn); traceSize += width; dvmCompilerAppendMIR(curBB, insn); - cUnit.numInsts++; + /* assign seqNum to each insn in the trace */ + insn->seqNum = cUnit.numInsts++; int flags = dexGetFlagsFromOpcode(insn->dalvikInsn.opcode); diff --git a/vm/compiler/InlineTransformation.cpp b/vm/compiler/InlineTransformation.cpp index 650340c5e..24cf052ff 100644 --- a/vm/compiler/InlineTransformation.cpp +++ b/vm/compiler/InlineTransformation.cpp @@ -69,7 +69,7 @@ static bool inlineGetter(CompilationUnit *cUnit, return false; } - int dfFlags = dvmCompilerDataFlowAttributes[getterInsn.opcode]; + int dfFlags = dvmGetDexOptAttributes(&getterInsn); /* Expecting vA to be the destination register */ if (dfFlags & (DF_UA | DF_UA_WIDE)) { @@ -156,7 +156,7 @@ static bool inlineSetter(CompilationUnit *cUnit, if (!dvmCompilerCanIncludeThisInstruction(calleeMethod, &setterInsn)) return false; - int dfFlags = dvmCompilerDataFlowAttributes[setterInsn.opcode]; + int dfFlags = dvmGetDexOptAttributes(&setterInsn); if (dfFlags & (DF_UA | DF_UA_WIDE)) { setterInsn.vA = convertRegId(&invokeMIR->dalvikInsn, calleeMethod, @@ -222,12 +222,26 @@ static bool inlineSetter(CompilationUnit *cUnit, return true; } +/* skip inlining certain method */ +__attribute__((weak)) bool dvmSkipInlineThisMethod(CompilationUnit *cUnit, + const Method *calleeMethod, + MIR *invokeMIR, + BasicBlock *invokeBB, + bool isPredicted, + bool isRange) +{ + return false; +} + static bool tryInlineSingletonCallsite(CompilationUnit *cUnit, const Method *calleeMethod, MIR *invokeMIR, BasicBlock *invokeBB, bool isRange) { + if (dvmSkipInlineThisMethod(cUnit, calleeMethod, invokeMIR, invokeBB, false, isRange)) + return true; + /* Not a Java method */ if (dvmIsNativeMethod(calleeMethod)) return false; @@ -276,6 +290,9 @@ static bool tryInlineVirtualCallsite(CompilationUnit *cUnit, BasicBlock *invokeBB, bool isRange) { + if (dvmSkipInlineThisMethod(cUnit, calleeMethod, invokeMIR, invokeBB, true, isRange)) + return true; + /* Not a Java method */ if (dvmIsNativeMethod(calleeMethod)) return false; diff --git a/vm/compiler/IntermediateRep.cpp b/vm/compiler/IntermediateRep.cpp index db68c3c85..94e102ee9 100644 --- a/vm/compiler/IntermediateRep.cpp +++ b/vm/compiler/IntermediateRep.cpp @@ -25,6 +25,7 @@ BasicBlock *dvmCompilerNewBB(BBType blockType, int blockId) bb->id = blockId; bb->predecessors = dvmCompilerAllocBitVector(blockId > 32 ? blockId : 32, true /* expandable */); + bb->blockLabelLIR=NULL; return bb; } @@ -117,6 +118,7 @@ void dvmCompilerInsertLIRBefore(LIR *currentLIR, LIR *newLIR) */ void dvmCompilerInsertLIRAfter(LIR *currentLIR, LIR *newLIR) { + assert(currentLIR->next != NULL); newLIR->prev = currentLIR; newLIR->next = currentLIR->next; currentLIR->next = newLIR; diff --git a/vm/compiler/Loop.cpp b/vm/compiler/Loop.cpp index dc04a1135..c830432ad 100644 --- a/vm/compiler/Loop.cpp +++ b/vm/compiler/Loop.cpp @@ -339,6 +339,15 @@ static void updateRangeCheckInfo(CompilationUnit *cUnit, int arrayReg, } } +__attribute__((weak)) void dvmCompilerDumpMIRInCodeMotion(CompilationUnit *cUnit, MIR *mir) +{ + if(cUnit->printMe){ + DecodedInstruction *decInsn = &(mir->dalvikInsn); + char *decodedString = dvmCompilerGetDalvikDisassembly(decInsn, NULL); + ALOGD("%#06x %s", decInsn->opcode, decodedString); + } +} + /* Returns true if the loop body cannot throw any exceptions */ static bool doLoopBodyCodeMotion(CompilationUnit *cUnit) { @@ -349,7 +358,7 @@ static bool doLoopBodyCodeMotion(CompilationUnit *cUnit) for (mir = loopBody->firstMIRInsn; mir; mir = mir->next) { DecodedInstruction *dInsn = &mir->dalvikInsn; int dfAttributes = - dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode]; + dvmGetDexOptAttributes(&mir->dalvikInsn); /* Skip extended MIR instructions */ if ((u2) dInsn->opcode >= kNumPackedOpcodes) continue; @@ -420,6 +429,7 @@ static bool doLoopBodyCodeMotion(CompilationUnit *cUnit) updateRangeCheckInfo(cUnit, mir->ssaRep->uses[refIdx], mir->ssaRep->uses[useIdx]); } + dvmCompilerDumpMIRInCodeMotion(cUnit, mir); } } @@ -508,6 +518,7 @@ static void genHoistedChecks(CompilationUnit *cUnit) dvmCompilerAbort(cUnit); } } + cUnit->hasHoistedChecks=true; } } @@ -667,6 +678,62 @@ bool dvmCompilerFilterLoopBlocks(CompilationUnit *cUnit) return true; } +__attribute__((weak)) void dvmCompilerDumpIVList(CompilationUnit *cUnit) +{ + unsigned int i; + GrowableList *ivList = cUnit->loopAnalysis->ivList; + + if(cUnit->printMe){ + for (i = 0; i < ivList->numUsed; i++) { + InductionVariableInfo *ivInfo = + (InductionVariableInfo *) ivList->elemList[i]; + int iv = dvmConvertSSARegToDalvik(cUnit, ivInfo->ssaReg); + /* Basic IV */ + if (ivInfo->ssaReg == ivInfo->basicSSAReg) { + ALOGD("BIV %d: s%d(v%d_%d) + %d", i, + ivInfo->ssaReg, + DECODE_REG(iv), DECODE_SUB(iv), + ivInfo->inc); + /* Dependent IV */ + } else { + int biv = dvmConvertSSARegToDalvik(cUnit, ivInfo->basicSSAReg); + + ALOGD("DIV %d: s%d(v%d_%d) = %d * s%d(v%d_%d) + %d", i, + ivInfo->ssaReg, + DECODE_REG(iv), DECODE_SUB(iv), + ivInfo->m, + ivInfo->basicSSAReg, + DECODE_REG(biv), DECODE_SUB(biv), + ivInfo->c); + } + } + } +} + +__attribute__((weak)) void dvmCompilerDumpHoistedChecks(CompilationUnit *cUnit) +{ + LoopAnalysis *loopAnalysis = cUnit->loopAnalysis; + unsigned int i; + if(cUnit->printMe){ + for (i = 0; i < loopAnalysis->arrayAccessInfo->numUsed; i++) { + ArrayAccessInfo *arrayAccessInfo = + GET_ELEM_N(loopAnalysis->arrayAccessInfo, + ArrayAccessInfo*, i); + int arrayReg = DECODE_REG( + dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->arrayReg)); + int idxReg = DECODE_REG( + dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->ivReg)); + ALOGD("Array access %d", i); + ALOGD(" arrayReg %d", arrayReg); + ALOGD(" idxReg %d", idxReg); + ALOGD(" endReg %d", loopAnalysis->endConditionReg); + ALOGD(" maxC %d", arrayAccessInfo->maxC); + ALOGD(" minC %d", arrayAccessInfo->minC); + ALOGD(" opcode %d", loopAnalysis->loopBranchOpcode); + } + } +} + /* * Main entry point to do loop optimization. * Return false if sanity checks for loop formation/optimization failed. @@ -699,6 +766,8 @@ bool dvmCompilerLoopOpt(CompilationUnit *cUnit) false /* isIterative */); DEBUG_LOOP(dumpIVList(cUnit);) + dvmCompilerDumpIVList(cUnit); + /* Only optimize array accesses for simple counted loop for now */ if (!isSimpleCountedLoop(cUnit)) return false; @@ -714,6 +783,7 @@ bool dvmCompilerLoopOpt(CompilationUnit *cUnit) * header. */ genHoistedChecks(cUnit); + dvmCompilerDumpHoistedChecks(cUnit); return true; } diff --git a/vm/compiler/SSATransformation.cpp b/vm/compiler/SSATransformation.cpp index 7dde59411..dc4e25c38 100644 --- a/vm/compiler/SSATransformation.cpp +++ b/vm/compiler/SSATransformation.cpp @@ -153,7 +153,7 @@ static void checkForDominanceFrontier(BasicBlock *domBB, if (succBB->iDom != domBB && succBB->blockType == kDalvikByteCode && succBB->hidden == false) { - dvmSetBit(domBB->domFrontier, succBB->id); + dvmCompilerSetBit(domBB->domFrontier, succBB->id); } } @@ -253,7 +253,7 @@ static bool computeBlockDominators(CompilationUnit *cUnit, BasicBlock *bb) /* tempBlockV = tempBlockV ^ dominators */ dvmIntersectBitVectors(tempBlockV, tempBlockV, predBB->dominators); } - dvmSetBit(tempBlockV, bb->id); + dvmCompilerSetBit(tempBlockV, bb->id); if (dvmCompareBitVectors(tempBlockV, bb->dominators)) { dvmCopyBitVector(bb->dominators, tempBlockV); return true; @@ -317,7 +317,7 @@ static void computeDominators(CompilationUnit *cUnit) /* Set the dominator for the root node */ dvmClearAllBits(cUnit->entryBlock->dominators); - dvmSetBit(cUnit->entryBlock->dominators, cUnit->entryBlock->id); + dvmCompilerSetBit(cUnit->entryBlock->dominators, cUnit->entryBlock->id); if (cUnit->tempBlockV == NULL) { cUnit->tempBlockV = dvmCompilerAllocBitVector(numTotalBlocks, @@ -526,7 +526,7 @@ static bool insertPhiNodeOperands(CompilationUnit *cUnit, BasicBlock *bb) int encodedSSAValue = predBB->dataFlowInfo->dalvikToSSAMap[dalvikReg]; int ssaReg = DECODE_REG(encodedSSAValue); - dvmSetBit(ssaRegV, ssaReg); + dvmCompilerSetBit(ssaRegV, ssaReg); } /* Count the number of SSA registers for a Dalvik register */ @@ -589,6 +589,13 @@ void dvmCompilerMethodSSATransformation(CompilationUnit *cUnit) false /* isIterative */); } +/* brief report of DFS order of trace blocks */ +__attribute__((weak)) void dumpDFSOrder(CompilationUnit *cUnit) +{ + ALOGV("DFS order complete"); + return; +} + /* Build a loop. Return true if a loop structure is successfully identified. */ bool dvmCompilerBuildLoop(CompilationUnit *cUnit) { @@ -605,6 +612,8 @@ bool dvmCompilerBuildLoop(CompilationUnit *cUnit) /* Re-compute the DFS order just for the loop */ computeDFSOrder(cUnit); + dumpDFSOrder(cUnit); + /* Re-compute the dominator info just for the loop */ computeDominators(cUnit); diff --git a/vm/compiler/codegen/CodegenFactory.cpp b/vm/compiler/codegen/CodegenFactory.cpp index f42ae746b..2bf27f5ab 100644 --- a/vm/compiler/codegen/CodegenFactory.cpp +++ b/vm/compiler/codegen/CodegenFactory.cpp @@ -133,9 +133,18 @@ static RegLocation loadValue(CompilationUnit *cUnit, RegLocation rlSrc, return rlSrc; } +__attribute__((weak)) bool storeValueThumb2(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc) +{ + return false; +} + static void storeValue(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) { + if(storeValueThumb2(cUnit, rlDest, rlSrc)) + return; + LIR *defStart; LIR *defEnd; assert(!rlDest.wide); @@ -204,9 +213,18 @@ static RegLocation loadValueWide(CompilationUnit *cUnit, RegLocation rlSrc, return rlSrc; } +__attribute__((weak)) bool storeValueWideThumb2(CompilationUnit *cUnit, RegLocation rlDest, + RegLocation rlSrc) +{ + return false; +} + static void storeValueWide(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) { + if(storeValueWideThumb2(cUnit, rlDest, rlSrc)) + return; + LIR *defStart; LIR *defEnd; assert(FPREG(rlSrc.lowReg)==FPREG(rlSrc.highReg)); diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h index 36f33e226..914064d6b 100644 --- a/vm/compiler/codegen/Optimizer.h +++ b/vm/compiler/codegen/Optimizer.h @@ -30,7 +30,9 @@ enum optControlVector { kSuppressLoads, kMethodInlining, kMethodJit, +#ifndef WITH_QC_PERF kShiftArithmetic, +#endif }; /* Forward declarations */ diff --git a/vm/compiler/codegen/PostOptimizer.h b/vm/compiler/codegen/PostOptimizer.h new file mode 100644 index 000000000..006102ac3 --- /dev/null +++ b/vm/compiler/codegen/PostOptimizer.h @@ -0,0 +1,267 @@ +/* Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DALVIK_VM_COMPILER_POSTOPTIMIZATION_H_ +#define DALVIK_VM_COMPILER_POSTOPTIMIZATION_H_ + +#include "Dalvik.h" +#include "libdex/DexOpcodes.h" +#include "compiler/codegen/arm/ArmLIR.h" + +//#include "compiler/codegen/Ralloc.h" + +//#include "compiler/codegen/arm/ArmLIR.h" +/* +#include "InlineNative.h" +#include "vm/Globals.h" +#include "vm/compiler/Loop.h" +#include "vm/compiler/Compiler.h" +#include "vm/compiler/CompilerInternals.h" +#include "vm/compiler/codegen/arm/ArmLIR.h" +#include "libdex/OpCodeNames.h" +#include "vm/compiler/codegen/arm/CalloutHelper.h" +#include "vm/compiler/codegen/arm/Ralloc.h" +*/ + + +/* Forward declarations */ +struct CompilationUnit; +struct LIR; +struct MIR; +//struct RegLocation; +struct ArmLIR; + +/* +enum OpCode; +enum RegisterClass; +////enum ArmOpCode; +enum ArmOpcode; +enum ArmConditionCode; +enum TemplateOpCode; +*/ + +void dvmCompilerApplyLocalOptimizations(struct CompilationUnit *cUnit, + struct LIR *head, + struct LIR *tail); + +void dvmCompilerApplyGlobalOptimizations(struct CompilationUnit *cUnit); + +bool dvmArithLocalOptimization(struct CompilationUnit *cUnit, + struct MIR *mir, + struct RegLocation rlDest, + struct RegLocation rlSrc1, + struct RegLocation rlSrc2); + +typedef struct LocalOptsFuncMap{ + + bool (*handleEasyDivide) (struct CompilationUnit *cUnit, + enum Opcode dalvikOpCode, + struct RegLocation rlSrc, + struct RegLocation rlDest, + int lit); + bool (*handleEasyMultiply) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + struct RegLocation rlDest, + int lit); + bool (*handleExecuteInline) (struct CompilationUnit *cUnit, + struct MIR *mir); + void (*handleExtendedMIR) (struct CompilationUnit *cUnit, + struct MIR *mir); + void (*insertChainingSwitch) (struct CompilationUnit *cUnit); + bool (*isPopCountLE2) (unsigned int x); + bool (*isPowerOfTwo) (int x); + int (*lowestSetBit) (unsigned int x); + void (*markCard) (struct CompilationUnit *cUnit, + int valReg, + int tgtAddrReg); + void (*setupLoopEntryBlock) (struct CompilationUnit *cUnit, + struct BasicBlock *entry, + struct ArmLIR *bodyLabel); + void (*genInterpSingleStep) (struct CompilationUnit *cUnit, + struct MIR *mir); + void (*setMemRefType) (struct ArmLIR *lir, + bool isLoad, + int memType); + void (*annotateDalvikRegAccess) (struct ArmLIR *lir, + int regId, + bool isLoad); + void (*setupResourceMasks) (struct ArmLIR *lir); + struct ArmLIR *(*newLIR0) (struct CompilationUnit *cUnit, + enum ArmOpcode opCode); + struct ArmLIR *(*newLIR1) (struct CompilationUnit *cUnit, + enum ArmOpcode opCode, + int dest); + struct ArmLIR *(*newLIR2) (struct CompilationUnit *cUnit, + enum ArmOpcode opCode, + int dest, + int src1); + struct ArmLIR *(*newLIR3) (struct CompilationUnit *cUnit, + enum ArmOpcode opCode, + int dest, + int src1, + int src2); +#if defined(_ARMV7_A) || defined(_ARMV7_A_NEON) + struct ArmLIR *(*newLIR4) (struct CompilationUnit *cUnit, + enum ArmOpcode opCode, + int dest, + int src1, + int src2, + int info); +#endif + struct RegLocation (*inlinedTarget) (struct CompilationUnit *cUnit, + struct MIR *mir, + bool fpHint); + struct ArmLIR *(*genCheckCommon) (struct CompilationUnit *cUnit, + int dOffset, + struct ArmLIR *branch, + struct ArmLIR *pcrLabel); + struct ArmLIR *(*loadWordDisp) (struct CompilationUnit *cUnit, + int rBase, + int displacement, + int rDest); + struct ArmLIR *(*storeWordDisp) (struct CompilationUnit *cUnit, + int rBase, + int displacement, + int rSrc); + void (*loadValueDirect) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + int reg1); + void (*loadValueDirectFixed) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + int reg1); + void (*loadValueDirectWide) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + int regLo, + int regHi); + void (*loadValueDirectWideFixed) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + int regLo, + int regHi); + struct RegLocation (*loadValue) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + enum RegisterClass opKind); + void (*storeValue) (struct CompilationUnit *cUnit, + struct RegLocation rlDest, + struct RegLocation rlSrc); + struct RegLocation (*loadValueWide) (struct CompilationUnit *cUnit, + struct RegLocation rlSrc, + enum RegisterClass opKind); + struct ArmLIR *(*genNullCheck) (struct CompilationUnit *cUnit, + int sReg, + int mReg, + int dOffset, + struct ArmLIR *pcrLabel); + struct ArmLIR *(*genRegRegCheck) (struct CompilationUnit *cUnit, + enum ArmConditionCode cond, + int reg1, + int reg2, + int dOffset, + struct ArmLIR *pcrLabel); + struct ArmLIR *(*genZeroCheck) (struct CompilationUnit *cUnit, + int mReg, + int dOffset, + struct ArmLIR *pcrLabel); + struct ArmLIR *(*genBoundsCheck) (struct CompilationUnit *cUnit, + int rIndex, + int rBound, + int dOffset, + struct ArmLIR *pcrLabel); + struct ArmLIR *(*loadConstantNoClobber) (struct CompilationUnit *cUnit, + int rDest, + int value); + struct ArmLIR *(*loadConstant) (struct CompilationUnit *cUnit, + int rDest, + int value); + void (*storeValueWide) (struct CompilationUnit *cUnit, + struct RegLocation rlDest, + struct RegLocation rlSrc); + void (*genSuspendPoll) (struct CompilationUnit *cUnit, struct MIR *mir); + struct ArmLIR *(*storeBaseDispWide)(struct CompilationUnit *cUnit, + int rBase, + int displacement, + int rSrcLo, + int rSrcHi); + struct ArmLIR *(*storeBaseDisp)(struct CompilationUnit *cUnit, + int rBase, + int displacement, + int rSrc, + OpSize size); + struct ArmLIR *(*loadBaseDispWide)(struct CompilationUnit *cUnit, + MIR *mir, + int rBase, + int displacement, + int rDestLo, + int rDestHi, + int sReg); + struct ArmLIR *(*opRegRegImm)(struct CompilationUnit *cUnit, + enum OpKind op, + int rDest, + int rSrc1, + int value); + struct ArmLIR *(*opRegRegReg)(struct CompilationUnit *cUnit, + enum OpKind op, + int rDest, + int rSrc1, + int rSrc2); + struct ArmLIR *(*loadBaseIndexed)(struct CompilationUnit *cUnit, + int rBase, + int rIndex, + int rDest, + int scale, + enum OpSize size); + struct ArmLIR *(*storeBaseIndexed)(struct CompilationUnit *cUnit, + int rBase, + int rIndex, + int rSrc, + int scale, + enum OpSize size); + enum RegisterClass (*dvmCompilerRegClassBySize)(enum OpSize size); + int (*encodeShift)(int code, int amount); + struct ArmLIR *(*opRegReg)(struct CompilationUnit *cUnit, + enum OpKind op, + int rDestSrc1, + int rSrc2); + struct ArmLIR *(*opCondBranch)(struct CompilationUnit *cUnit, + enum ArmConditionCode cc); + struct ArmLIR *(*genIT)(struct CompilationUnit *cUnit, + enum ArmConditionCode code, + const char *guide); + void (*genBarrier)(struct CompilationUnit *cUnit); + int (*modifiedImmediate)(u4 value); + struct ArmLIR *(*genRegImmCheck)(struct CompilationUnit *cUnit, + enum ArmConditionCode cond, + int reg, + int checkValue, + int dOffset, + ArmLIR *pcrLabel); +} LocalOptsFuncMap; + +extern LocalOptsFuncMap localOptsFunMap; + +#endif // DALVIK_VM_COMPILER_POSTOPTIMIZATION_H_ + diff --git a/vm/compiler/codegen/arm/ArchFactory.cpp b/vm/compiler/codegen/arm/ArchFactory.cpp index 2daa7bcba..6be40b2d2 100644 --- a/vm/compiler/codegen/arm/ArchFactory.cpp +++ b/vm/compiler/codegen/arm/ArchFactory.cpp @@ -61,7 +61,7 @@ static TGT_LIR *genNullCheck(CompilationUnit *cUnit, int sReg, int mReg, if (dvmIsBitSet(cUnit->regPool->nullCheckedRegs, sReg)) { return pcrLabel; } - dvmSetBit(cUnit->regPool->nullCheckedRegs, sReg); + dvmCompilerSetBit(cUnit->regPool->nullCheckedRegs, sReg); return genRegImmCheck(cUnit, kArmCondEq, mReg, 0, dOffset, pcrLabel); } diff --git a/vm/compiler/codegen/arm/ArchUtility.cpp b/vm/compiler/codegen/arm/ArchUtility.cpp index 9f87b7ff4..db1281ff1 100644 --- a/vm/compiler/codegen/arm/ArchUtility.cpp +++ b/vm/compiler/codegen/arm/ArchUtility.cpp @@ -144,11 +144,14 @@ static void buildInsnString(const char *fmt, ArmLIR *lir, char* buf, operand = expandImmediate(operand); sprintf(tbuf,"%d [%#x]", operand, operand); break; + case 'q': + sprintf(tbuf,"q%d",(operand - 128 - FP_REG_OFFSET) >> 2); + break; case 's': sprintf(tbuf,"s%d",operand & FP_REG_MASK); break; case 'S': - sprintf(tbuf,"d%d",(operand & FP_REG_MASK) >> 1); + sprintf(tbuf,"d%d",(operand - FP_DOUBLE - FP_REG_OFFSET) >> 1); break; case 'h': sprintf(tbuf,"%04x", operand); @@ -189,6 +192,15 @@ static void buildInsnString(const char *fmt, ArmLIR *lir, char* buf, case kArmCondMi: strcpy(tbuf, "mi"); break; + case kArmCondPl: + strcpy(tbuf, "pl"); + break; + case kArmCondHi: + strcpy(tbuf, "hi"); + break; + case kArmCondLs: + strcpy(tbuf, "ls"); + break; default: strcpy(tbuf, ""); break; @@ -291,12 +303,26 @@ void dvmDumpResourceMask(LIR *lir, u8 mask, const char *prefix) #define DUMP_RESOURCE_MASK(X) #define DUMP_SSA_REP(X) +/* + * Decodes generic ARM opcodes + */ +static void printDefaultInstr(ArmLIR *lir, unsigned char *baseAddr) +{ + char buf[256]; + char opName[256]; + int offset = lir->generic.offset; + + buildInsnString(getEncoding(lir->opcode)->name, lir, opName, baseAddr, 256); + buildInsnString(getEncoding(lir->opcode)->fmt, lir, buf, baseAddr, 256); + ALOGD("%p (%04x): %-12s%s%s", + baseAddr + offset, offset, opName, buf, + lir->flags.isNop ? "(nop)" : ""); +} + /* Pretty-print a LIR instruction */ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) { ArmLIR *lir = (ArmLIR *) arg; - char buf[256]; - char opName[256]; int offset = lir->generic.offset; int dest = lir->operands[0]; const bool dumpNop = false; @@ -358,6 +384,10 @@ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) ALOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x", dest, lir->operands[1]); break; + case kArmPseudoPCReconstructionCellExtended: + ALOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x (extended)\n", dest, + lir->operands[1]); + break; case kArmPseudoPCReconstructionBlockLabel: /* Do nothing */ break; @@ -372,13 +402,7 @@ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr) if (lir->flags.isNop && !dumpNop) { break; } - buildInsnString(EncodingMap[lir->opcode].name, lir, opName, - baseAddr, 256); - buildInsnString(EncodingMap[lir->opcode].fmt, lir, buf, baseAddr, - 256); - ALOGD("%p (%04x): %-8s%s%s", - baseAddr + offset, offset, opName, buf, - lir->flags.isNop ? "(nop)" : ""); + printDefaultInstr(lir, baseAddr); break; } diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index e159aecdb..49d5b2725 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -316,6 +316,7 @@ typedef enum ArmConditionCode { * Assemble.c. */ typedef enum ArmOpcode { + kArmPseudoPCReconstructionCellExtended = -19, /* pcReconstruction for extended MIR*/ kArmChainingCellBottom = -18, kArmPseudoBarrier = -17, kArmPseudoExtended = -16, @@ -536,7 +537,7 @@ typedef enum ArmOpcode { [0000] rm[3..0] */ kThumb2MulRRR, /* mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0] */ - kThumb2MnvRR, /* mvn [11101010011011110] rd[11-8] [0000] + kThumb2MvnRR, /* mvn [11101010011011110] rd[11-8] [0000] rm[3..0] */ kThumb2RsubRRI8, /* rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0] */ @@ -627,8 +628,10 @@ typedef enum ArmOpcode { kThumb2Dmb, /* dmb [1111001110111111100011110101] option[3-0] */ kThumb2LdrPcReln12, /* ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0] */ +#ifndef WITH_QC_PERF kThumb2RsbRRR, /* rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0] */ +#endif kThumbUndefined, /* undefined [11011110xxxxxxxx] */ kArmLast, } ArmOpcode; @@ -670,6 +673,8 @@ typedef enum ArmOpFeatureFlags { kUsesCCodes, kMemLoad, kMemStore, + kSetsFPStatus, + kUsesFPStatus, } ArmOpFeatureFlags; #define IS_LOAD (1 << kMemLoad) @@ -697,6 +702,8 @@ typedef enum ArmOpFeatureFlags { #define IS_IT (1 << kIsIT) #define SETS_CCODES (1 << kSetsCCodes) #define USES_CCODES (1 << kUsesCCodes) +#define SETS_FPSTATUS (1 << kSetsFPStatus) +#define USES_FPSTATUS (1 << kUsesFPStatus) /* Common combo register usage patterns */ #define REG_USE01 (REG_USE0 | REG_USE1) @@ -747,6 +754,7 @@ typedef enum ArmTargetOptHints { } ArmTargetOptHints; extern ArmEncodingMap EncodingMap[kArmLast]; +extern ArmEncodingMap* getEncoding(ArmOpcode opcode); /* * Each instance of this struct holds a pseudo or real LIR instruction: @@ -776,6 +784,7 @@ typedef struct ArmLIR { int aliasInfo; // For Dalvik register & litpool disambiguation u8 useMask; // Resource mask for use u8 defMask; // Resource mask for def + u4* extraData; } ArmLIR; /* Init values when a predicted chain is initially assembled */ diff --git a/vm/compiler/codegen/arm/Assemble.cpp b/vm/compiler/codegen/arm/Assemble.cpp index 10572eb5f..5e64c0107 100644 --- a/vm/compiler/codegen/arm/Assemble.cpp +++ b/vm/compiler/codegen/arm/Assemble.cpp @@ -654,7 +654,7 @@ ArmEncodingMap EncodingMap[kArmLast] = { ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, - IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, + IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES | USES_CCODES, "adcs", "r!0d, r!1d, r!2d!3H", 2), ENCODING_MAP(kThumb2AndRRR, 0xea000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, @@ -677,7 +677,7 @@ ArmEncodingMap EncodingMap[kArmLast] = { kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, "mul", "r!0d, r!1d, r!2d", 2), - ENCODING_MAP(kThumb2MnvRR, 0xea6f0000, + ENCODING_MAP(kThumb2MvnRR, 0xea6f0000, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, "mvn", "r!0d, r!1d, shift !2d", 2), @@ -685,12 +685,12 @@ ArmEncodingMap EncodingMap[kArmLast] = { kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "rsb", "r!0d,r!1d,#!2m", 2), - ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */ + "rsbs", "r!0d,r!1d,#!2m", 2), + ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsbs */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "neg", "r!0d,r!1d", 2), + "negs", "r!0d,r!1d", 2), ENCODING_MAP(kThumb2OrrRRR, 0xea400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, @@ -774,15 +774,15 @@ ArmEncodingMap EncodingMap[kArmLast] = { "it:!1b", "!0c", 1), ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES, + kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES | USES_FPSTATUS, "fmstat", "", 2), ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_FPSTATUS, "vcmp.f64", "!0S, !1S", 2), ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_FPSTATUS, "vcmp.f32", "!0s, !1s", 2), ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, @@ -881,11 +881,13 @@ ArmEncodingMap EncodingMap[kArmLast] = { kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, "ldr", "r!0d, [r15pc, -#!1d]", 2), +#ifndef WITH_QC_PERF ENCODING_MAP(kThumb2RsbRRR, 0xebd00000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, "rsb", "r!0d, r!1d, r!2d!3H", 2), +#endif ENCODING_MAP(kThumbUndefined, 0xde00, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, @@ -929,6 +931,24 @@ static void installLiteralPools(CompilationUnit *cUnit) } /* + * Return the encoding map for the specified ARM opcode + */ +__attribute__((weak)) ArmEncodingMap* getEncoding(ArmOpcode opcode) { + if (opcode > kThumbUndefined) { + opcode = kThumbUndefined; + } + ArmEncodingMap* encoder = &EncodingMap[opcode]; + return encoder; +} + +/* process more ARM encodings */ +__attribute__((weak)) u4 processMoreEncodings(ArmEncodingMap* encoder, int pos, u4 operand) +{ + assert(0); + return 0; +} + +/* * Assemble the LIR into binary instruction format. Note that we may * discover that pc-relative displacements may not fit the selected * instruction. In those cases we will try to substitute a new code @@ -1073,7 +1093,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; } - ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + ArmEncodingMap* encoder = getEncoding(lir->opcode); u4 bits = encoder->skeleton; int i; for (i = 0; i < 4; i++) { @@ -1128,7 +1148,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, case kFmtDfp: { assert(DOUBLEREG(operand)); assert((operand & 0x1) == 0); - int regName = (operand & FP_REG_MASK) >> 1; + int regName = (operand - FP_DOUBLE - FP_REG_OFFSET) / 2; /* Snag the 1-bit slice and position it */ value = ((regName & 0x10) >> 4) << encoder->fieldLoc[i].end; @@ -1163,7 +1183,8 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit, bits |= value; break; default: - assert(0); + bits |= processMoreEncodings(encoder, i, operand); + break; } } if (encoder->size == 2) { @@ -1296,10 +1317,10 @@ static inline u4 getChainCellSize(const ChainCellCounts* pChainCellCounts) for (i = 0; i < kChainingCellGap; i++) { if (i != kChainingCellInvokePredicted) { cellSize += pChainCellCounts->u.count[i] * - (CHAIN_CELL_NORMAL_SIZE >> 2); + ((CHAIN_CELL_NORMAL_SIZE >> 2)+pChainCellCounts->extraSize); } else { cellSize += pChainCellCounts->u.count[i] * - (CHAIN_CELL_PREDICTED_SIZE >> 2); + ((CHAIN_CELL_PREDICTED_SIZE >> 2)+pChainCellCounts->extraSize); } } return cellSize; @@ -1378,7 +1399,9 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) armLIR = NEXT_LIR(armLIR)) { armLIR->generic.offset = offset; if (armLIR->opcode >= 0 && !armLIR->flags.isNop) { - armLIR->flags.size = EncodingMap[armLIR->opcode].size * 2; + armLIR->flags.size = getEncoding(armLIR->opcode)->size * 2; + if(armLIR->flags.size==0) + armLIR->flags.isNop=true; offset += armLIR->flags.size; } else if (armLIR->opcode == kArmPseudoPseudoAlign4) { if (offset & 0x2) { @@ -1521,6 +1544,9 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) /* Set the gap number in the chaining cell count structure */ chainCellCounts.u.count[kChainingCellGap] = chainingCellGap; + assert((cUnit->chainingCellExtraSize & 0x3) ==0); + chainCellCounts.extraSize = cUnit->chainingCellExtraSize >> 2; + memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts, sizeof(chainCellCounts)); @@ -1558,7 +1584,7 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) */ static u4 getSkeleton(ArmOpcode op) { - return EncodingMap[op].skeleton; + return getEncoding(op)->skeleton; } static u4 assembleChainingBranch(int branchOffset, bool thumbTarget) @@ -1916,6 +1942,7 @@ static u4* unchainSingle(JitEntry *trace) } for (j = 0; j < pChainCellCounts->u.count[i]; j++) { + pChainCells += pChainCellCounts->extraSize; switch(i) { case kChainingCellNormal: case kChainingCellHot: @@ -2200,13 +2227,14 @@ static void findClassPointersSingleTrace(char *base, void (*callback)(void *)) chainTypeIdx++) { if (chainTypeIdx != kChainingCellInvokePredicted) { /* In 32-bit words */ - pChainCells += (CHAIN_CELL_NORMAL_SIZE >> 2) * + pChainCells += ((CHAIN_CELL_NORMAL_SIZE >> 2)+pChainCellCounts->extraSize) * pChainCellCounts->u.count[chainTypeIdx]; continue; } for (chainIdx = 0; chainIdx < pChainCellCounts->u.count[chainTypeIdx]; chainIdx++) { + pChainCells += pChainCellCounts->extraSize; PredictedChainingCell *cell = (PredictedChainingCell *) pChainCells; /* diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h index 7ec921770..873648cc9 100644 --- a/vm/compiler/codegen/arm/Codegen.h +++ b/vm/compiler/codegen/arm/Codegen.h @@ -25,6 +25,8 @@ #include "compiler/CompilerIR.h" #include "CalloutHelper.h" +#include "compiler/codegen/PostOptimizer.h" + #if defined(_CODEGEN_C) /* * loadConstant() sometimes needs to add a small imm to a pre-existing constant diff --git a/vm/compiler/codegen/arm/CodegenCommon.cpp b/vm/compiler/codegen/arm/CodegenCommon.cpp index 5c02678fe..52ae87aa4 100644 --- a/vm/compiler/codegen/arm/CodegenCommon.cpp +++ b/vm/compiler/codegen/arm/CodegenCommon.cpp @@ -36,7 +36,7 @@ static void setMemRefType(ArmLIR *lir, bool isLoad, int memType) { u8 *maskPtr; u8 mask = ENCODE_MEM;; - assert(EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE)); + assert(getEncoding(lir->opcode)->flags & (IS_LOAD | IS_STORE)); if (isLoad) { maskPtr = &lir->useMask; } else { @@ -58,7 +58,7 @@ static void setMemRefType(ArmLIR *lir, bool isLoad, int memType) break; case kMustNotAlias: /* Currently only loads can be marked as kMustNotAlias */ - assert(!(EncodingMap[lir->opcode].flags & IS_STORE)); + assert(!(getEncoding(lir->opcode)->flags & IS_STORE)); *maskPtr |= ENCODE_MUST_NOT_ALIAS; break; default: @@ -94,7 +94,6 @@ static inline u8 getRegMaskCommon(int reg) u8 seed; int shift; int regId = reg & 0x1f; - /* * Each double register is equal to a pair of single-precision FP registers */ @@ -120,6 +119,16 @@ static inline void setupRegMask(u8 *mask, int reg) *mask |= getRegMaskCommon(reg); } +/* skip certain def masks */ +__attribute__((weak)) bool skipDefRegMasks(ArmLIR *lir) { + return false; +} + +/* skip certain use masks */ +__attribute__((weak)) bool skipUseRegMasks(ArmLIR *lir) { + return false; +} + /* * Set up the proper fields in the resource mask */ @@ -133,7 +142,7 @@ static void setupResourceMasks(ArmLIR *lir) return; } - flags = EncodingMap[lir->opcode].flags; + flags = getEncoding(lir->opcode)->flags; /* Set up the mask for resources that are updated */ if (flags & (IS_LOAD | IS_STORE)) { @@ -150,12 +159,14 @@ static void setupResourceMasks(ArmLIR *lir) return; } - if (flags & REG_DEF0) { - setupRegMask(&lir->defMask, lir->operands[0]); - } + if (!skipDefRegMasks(lir)) { + if (flags & REG_DEF0) { + setupRegMask(&lir->defMask, lir->operands[0]); + } - if (flags & REG_DEF1) { - setupRegMask(&lir->defMask, lir->operands[1]); + if (flags & REG_DEF1) { + setupRegMask(&lir->defMask, lir->operands[1]); + } } if (flags & REG_DEF_SP) { @@ -178,6 +189,10 @@ static void setupResourceMasks(ArmLIR *lir) lir->defMask |= ENCODE_CCODE; } + if (flags & SETS_FPSTATUS) { + lir->defMask |= ENCODE_FP_STATUS; + } + /* Conservatively treat the IT block */ if (flags & IS_IT) { lir->defMask = ENCODE_ALL; @@ -185,10 +200,11 @@ static void setupResourceMasks(ArmLIR *lir) if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) { int i; - - for (i = 0; i < 4; i++) { - if (flags & (1 << (kRegUse0 + i))) { - setupRegMask(&lir->useMask, lir->operands[i]); + if (!skipUseRegMasks(lir)) { + for (i = 0; i < 4; i++) { + if (flags & (1 << (kRegUse0 + i))) { + setupRegMask(&lir->useMask, lir->operands[i]); + } } } } @@ -213,6 +229,10 @@ static void setupResourceMasks(ArmLIR *lir) lir->useMask |= ENCODE_CCODE; } + if (flags & USES_FPSTATUS) { + lir->useMask |= ENCODE_FP_STATUS; + } + /* Fixup for kThumbPush/lr and kThumbPop/pc */ if (opcode == kThumbPush || opcode == kThumbPop) { u8 r8Mask = getRegMaskCommon(r8); @@ -231,7 +251,7 @@ static void setupResourceMasks(ArmLIR *lir) */ static void relaxBranchMasks(ArmLIR *lir) { - int flags = EncodingMap[lir->opcode].flags; + int flags = getEncoding(lir->opcode)->flags; /* Make sure only branch instructions are passed here */ assert(flags & IS_BRANCH); @@ -264,7 +284,7 @@ static void relaxBranchMasks(ArmLIR *lir) static ArmLIR *newLIR0(CompilationUnit *cUnit, ArmOpcode opcode) { ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); - assert(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND)); + assert(isPseudoOpcode(opcode) || (getEncoding(opcode)->flags & NO_OPERAND)); insn->opcode = opcode; setupResourceMasks(insn); dvmCompilerAppendLIR(cUnit, (LIR *) insn); @@ -275,7 +295,7 @@ static ArmLIR *newLIR1(CompilationUnit *cUnit, ArmOpcode opcode, int dest) { ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); - assert(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP)); + assert(isPseudoOpcode(opcode) || (getEncoding(opcode)->flags & IS_UNARY_OP)); insn->opcode = opcode; insn->operands[0] = dest; setupResourceMasks(insn); @@ -288,7 +308,7 @@ static ArmLIR *newLIR2(CompilationUnit *cUnit, ArmOpcode opcode, { ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); assert(isPseudoOpcode(opcode) || - (EncodingMap[opcode].flags & IS_BINARY_OP)); + (getEncoding(opcode)->flags & IS_BINARY_OP)); insn->opcode = opcode; insn->operands[0] = dest; insn->operands[1] = src1; @@ -301,11 +321,11 @@ static ArmLIR *newLIR3(CompilationUnit *cUnit, ArmOpcode opcode, int dest, int src1, int src2) { ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); - if (!(EncodingMap[opcode].flags & IS_TERTIARY_OP)) { + if (!(getEncoding(opcode)->flags & IS_TERTIARY_OP)) { ALOGE("Bad LIR3: %s[%d]",EncodingMap[opcode].name,opcode); } assert(isPseudoOpcode(opcode) || - (EncodingMap[opcode].flags & IS_TERTIARY_OP)); + (getEncoding(opcode)->flags & IS_TERTIARY_OP)); insn->opcode = opcode; insn->operands[0] = dest; insn->operands[1] = src1; @@ -317,11 +337,11 @@ static ArmLIR *newLIR3(CompilationUnit *cUnit, ArmOpcode opcode, #if defined(_ARMV7_A) || defined(_ARMV7_A_NEON) static ArmLIR *newLIR4(CompilationUnit *cUnit, ArmOpcode opcode, - int dest, int src1, int src2, int info) + int dest, int src1, int src2, int info) { ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); assert(isPseudoOpcode(opcode) || - (EncodingMap[opcode].flags & IS_QUAD_OP)); + (getEncoding(opcode)->flags & IS_QUAD_OP)); insn->opcode = opcode; insn->operands[0] = dest; insn->operands[1] = src1; diff --git a/vm/compiler/codegen/arm/CodegenDriver.cpp b/vm/compiler/codegen/arm/CodegenDriver.cpp index de53b00fb..541427592 100644 --- a/vm/compiler/codegen/arm/CodegenDriver.cpp +++ b/vm/compiler/codegen/arm/CodegenDriver.cpp @@ -297,9 +297,17 @@ static inline ArmLIR *genTrap(CompilationUnit *cUnit, int dOffset, return genCheckCommon(cUnit, dOffset, branch, pcrLabel); } +__attribute__((weak)) bool genIGetWideThumb2(CompilationUnit *cUnit, MIR *mir, int fieldOffset) +{ + return false; +} + /* Load a wide field from an object instance */ static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset) { + if (genIGetWideThumb2(cUnit, mir, fieldOffset)) + return; + RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0); RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); RegLocation rlResult; @@ -321,9 +329,17 @@ static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset) storeValueWide(cUnit, rlDest, rlResult); } +__attribute__((weak)) bool genIPutWideThumb2(CompilationUnit *cUnit, MIR *mir, int fieldOffset) +{ + return false; +} + /* Store a wide field to an object instance */ static void genIPutWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset) { + if (genIPutWideThumb2(cUnit, mir, fieldOffset)) + return; + RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 2); rlObj = loadValue(cUnit, rlObj, kCoreReg); @@ -398,6 +414,13 @@ static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size, } } +#ifdef WITH_QC_PERF +__attribute__((weak)) bool genArrayGetThumb2(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlDest, int scale) +{ + return false; +} /* * Generate array load @@ -406,6 +429,10 @@ static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size, RegLocation rlArray, RegLocation rlIndex, RegLocation rlDest, int scale) { + if(genArrayGetThumb2(cUnit, mir, size, rlArray, rlIndex, + rlDest, scale)) + return; + RegisterClass regClass = dvmCompilerRegClassBySize(size); int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); @@ -467,6 +494,13 @@ static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size, } } +__attribute__((weak)) bool genArrayPutThumb2(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlSrc, int scale) +{ + return false; +} + /* * Generate array store * @@ -475,6 +509,10 @@ static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size, RegLocation rlArray, RegLocation rlIndex, RegLocation rlSrc, int scale) { + if(genArrayPutThumb2(cUnit, mir, size, rlArray, rlIndex, + rlSrc, scale)) + return; + RegisterClass regClass = dvmCompilerRegClassBySize(size); int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); @@ -540,6 +578,7 @@ static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size, HEAP_ACCESS_SHADOW(false); } } +#endif /* * Generate array object store @@ -673,6 +712,7 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, bool checkZero = false; void *callTgt; int retReg = r0; + bool setCCode = false; switch (mir->dalvikInsn.opcode) { case OP_NOT_LONG: @@ -687,11 +727,13 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, case OP_ADD_LONG_2ADDR: firstOp = kOpAdd; secondOp = kOpAdc; + setCCode = true; break; case OP_SUB_LONG: case OP_SUB_LONG_2ADDR: firstOp = kOpSub; secondOp = kOpSbc; + setCCode = true; break; case OP_MUL_LONG: case OP_MUL_LONG_2ADDR: @@ -733,8 +775,10 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); loadConstantNoClobber(cUnit, tReg, 0); + SET_CCODE; opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc2.lowReg); + UNSET_CCODE; opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg); genRegCopy(cUnit, rlResult.highReg, tReg); storeValueWide(cUnit, rlDest, rlResult); @@ -745,7 +789,7 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir, dvmCompilerAbort(cUnit); } if (!callOut) { - genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2); + genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2, setCCode); } else { // Adjust return regs in to handle case of rem returning r2/r3 dvmCompilerFlushAllRegs(cUnit); /* Send everything to home location */ @@ -1078,7 +1122,9 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir, loadMultiple(cUnit, r4PC, regMask); /* No need to generate the loop structure if numArgs <= 11 */ if (numArgs > 11) { + SET_CCODE; opRegImm(cUnit, kOpSub, r5FP, 4); + UNSET_CCODE; genConditionalBranch(cUnit, kArmCondNe, loopLabel); } } @@ -1449,6 +1495,20 @@ static void genSuspendPoll(CompilationUnit *cUnit, MIR *mir) genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL); } +__attribute__((weak)) void dvmGenSuspendPoll(CompilationUnit *cUnit, + BasicBlock *bb, + MIR *mir, + bool genSuspendPollEnabled) +{ + /* backward branch? */ + bool backwardBranch = (bb->taken->startOffset <= mir->offset); + + if (backwardBranch && + (genSuspendPollEnabled || cUnit->jitMode == kJitLoop)) { + genSuspendPoll(cUnit, mir); + } +} + /* * The following are the first-level codegen routines that analyze the format * of each bytecode then either dispatch special purpose codegen routines @@ -1458,13 +1518,7 @@ static void genSuspendPoll(CompilationUnit *cUnit, MIR *mir) static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, ArmLIR *labelList) { - /* backward branch? */ - bool backwardBranch = (bb->taken->startOffset <= mir->offset); - - if (backwardBranch && - (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) { - genSuspendPoll(cUnit, mir); - } + dvmGenSuspendPoll(cUnit, bb, mir, gDvmJit.genSuspendPoll); int numPredecessors = dvmCountSetBits(bb->taken->predecessors); /* @@ -1530,10 +1584,17 @@ static bool handleFmt11n_Fmt31i(CompilationUnit *cUnit, MIR *mir) case OP_CONST_WIDE_32: { //TUNING: single routine to load constant pair for support doubles //TUNING: load 0/-1 separately to avoid load dependency - rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB); - opRegRegImm(cUnit, kOpAsr, rlResult.highReg, - rlResult.lowReg, 31); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + if(FPREG(rlResult.lowReg)){ + /* if the constant is FP, use VFP register to hold it */ + loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg, + mir->dalvikInsn.vB, + ((mir->dalvikInsn.vB)&0x80000000) == 0x80000000? -1:0); + }else{ + loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB); + opRegRegImm(cUnit, kOpAsr, rlResult.highReg, + rlResult.lowReg, 31); + } storeValueWide(cUnit, rlDest, rlResult); break; } @@ -2081,10 +2142,17 @@ static bool handleFmt21s(CompilationUnit *cUnit, MIR *mir) int BBBB = mir->dalvikInsn.vB; if (dalvikOpcode == OP_CONST_WIDE_16) { rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); - rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB); - //TUNING: do high separately to avoid load dependency - opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31); + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + if(FPREG(rlResult.lowReg)){ + /* if the constant is FP, use VFP register to hold it */ + loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg, + mir->dalvikInsn.vB, + ((mir->dalvikInsn.vB)&0x80000000) == 0x80000000? -1:0); + }else{ + loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB); + //TUNING: do high separately to avoid load dependency + opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31); + } storeValueWide(cUnit, rlDest, rlResult); } else if (dalvikOpcode == OP_CONST_16) { rlDest = dvmCompilerGetDest(cUnit, mir, 0); @@ -2102,13 +2170,7 @@ static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, { Opcode dalvikOpcode = mir->dalvikInsn.opcode; ArmConditionCode cond; - /* backward branch? */ - bool backwardBranch = (bb->taken->startOffset <= mir->offset); - - if (backwardBranch && - (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) { - genSuspendPoll(cUnit, mir); - } + dvmGenSuspendPoll(cUnit, bb, mir, gDvmJit.genSuspendPoll); RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); rlSrc = loadValue(cUnit, rlSrc, kCoreReg); @@ -2257,7 +2319,14 @@ static bool handleEasyMultiply(CompilationUnit *cUnit, } else { // Reverse subtract: (src << (shift + 1)) - src. assert(powerOfTwoMinusOne); +#ifdef WITH_QC_PERF + // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1) + int tReg = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1)); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg); +#else genMultiplyByShiftAndReverseSubtract(cUnit, rlSrc, rlResult, lowestSetBit(lit + 1)); +#endif } storeValue(cUnit, rlDest, rlResult); return true; @@ -2598,13 +2667,7 @@ static bool handleFmt22t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, { Opcode dalvikOpcode = mir->dalvikInsn.opcode; ArmConditionCode cond; - /* backward branch? */ - bool backwardBranch = (bb->taken->startOffset <= mir->offset); - - if (backwardBranch && - (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) { - genSuspendPoll(cUnit, mir); - } + dvmGenSuspendPoll(cUnit, bb, mir, gDvmJit.genSuspendPoll); RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0); RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1); @@ -2669,6 +2732,17 @@ static bool handleFmt22x_Fmt32x(CompilationUnit *cUnit, MIR *mir) return false; } +/* + * Utility funtion to check the DEX opcode in the MIR + */ +__attribute__((weak)) bool isInvalidMIR(CompilationUnit *cUnit, MIR *mir) +{ + bool result = false; + + return result; +} + + static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir) { Opcode opcode = mir->dalvikInsn.opcode; @@ -3484,7 +3558,9 @@ static bool genInlinedStringIsEmptyOrLength(CompilationUnit *cUnit, MIR *mir, if (isEmpty) { // dst = (dst == 0); int tReg = dvmCompilerAllocTemp(cUnit); + SET_CCODE; opRegReg(cUnit, kOpNeg, tReg, rlResult.lowReg); + UNSET_CCODE; opRegRegReg(cUnit, kOpAdc, rlResult.lowReg, rlResult.lowReg, tReg); } storeValue(cUnit, rlDest, rlResult); @@ -3561,7 +3637,9 @@ static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) * mechanism for now. */ opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31); + SET_CCODE; opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg); + UNSET_CCODE; opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg); opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg); opRegReg(cUnit, kOpXor, rlResult.highReg, signReg); @@ -3587,6 +3665,12 @@ static bool genInlinedLongDoubleConversion(CompilationUnit *cUnit, MIR *mir) return false; } +__attribute__((weak)) int getInlineTableFunc(int operation) +{ + const InlineOperation* inLineTable = dvmGetInlineOpsTable(); + return ((int)inLineTable[operation].func); +} + /* * JITs a call to a C function. * TODO: use this for faster native method invocation for simple native @@ -3597,8 +3681,7 @@ static bool handleExecuteInlineC(CompilationUnit *cUnit, MIR *mir) DecodedInstruction *dInsn = &mir->dalvikInsn; int operation = dInsn->vB; unsigned int i; - const InlineOperation* inLineTable = dvmGetInlineOpsTable(); - uintptr_t fn = (int) inLineTable[operation].func; + uintptr_t fn = getInlineTableFunc(operation); if (fn == 0) { dvmCompilerAbort(cUnit); } @@ -3608,14 +3691,55 @@ static bool handleExecuteInlineC(CompilationUnit *cUnit, MIR *mir) dvmCompilerClobber(cUnit, r7); int offset = offsetof(Thread, interpSave.retval); opRegRegImm(cUnit, kOpAdd, r4PC, r6SELF, offset); +#ifdef INLINE_ARG_EXPANDED + switch( dInsn->vA ){ + case 7: + loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, 6), r7); + opImm(cUnit, kOpPush, (1<<r7)); + /* fall through */ + case 6: + loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, 5), r7); + opImm(cUnit, kOpPush, (1<<r7)); + /* fall through */ + case 5: + loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, 4), r7); + } + opImm(cUnit, kOpPush, (1<<r4PC) | (1<<r7)); + LOAD_FUNC_ADDR(cUnit, r4PC, fn); + genExportPC(cUnit, mir); +#else opImm(cUnit, kOpPush, (1<<r4PC) | (1<<r7)); LOAD_FUNC_ADDR(cUnit, r4PC, fn); genExportPC(cUnit, mir); +#endif + +#ifdef INLINE_ARG_EXPANDED + if( dInsn->vA >= 5 ){ + for (i=0; i < 4; i++) { + loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i); + } + } else { + for (i=0; i < dInsn->vA; i++) { + loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i); + } + } +#else for (i=0; i < dInsn->vA; i++) { loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i); } +#endif opReg(cUnit, kOpBlx, r4PC); +#ifdef INLINE_ARG_EXPANDED + if( dInsn->vA == 7 ){ + opRegImm(cUnit, kOpAdd, r13sp, 16); + } else if( dInsn->vA == 6 ){ + opRegImm(cUnit, kOpAdd, r13sp, 12); + } else { + opRegImm(cUnit, kOpAdd, r13sp, 8); + } +#else opRegImm(cUnit, kOpAdd, r13sp, 8); +#endif /* NULL? */ ArmLIR *branchOver = genCmpImmBranch(cUnit, kArmCondNe, r0, 0); loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset)); @@ -3692,6 +3816,8 @@ static bool handleExecuteInline(CompilationUnit *cUnit, MIR *mir) case INLINE_DOUBLE_TO_LONG_BITS: return handleExecuteInlineC(cUnit, mir); } + return handleExecuteInlineC(cUnit, mir); + dvmCompilerAbort(cUnit); return false; // Not reachable; keeps compiler happy. } @@ -3700,10 +3826,9 @@ static bool handleFmt51l(CompilationUnit *cUnit, MIR *mir) { //TUNING: We're using core regs here - not optimal when target is a double RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1); - RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); - loadConstantNoClobber(cUnit, rlResult.lowReg, - mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL); - loadConstantNoClobber(cUnit, rlResult.highReg, + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true); + loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg, + mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL, (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL); storeValueWide(cUnit, rlDest, rlResult); return false; @@ -3825,6 +3950,31 @@ static void handleInvokePredictedChainingCell(CompilationUnit *cUnit) addWordData(cUnit, NULL, PREDICTED_CHAIN_COUNTER_INIT); } +static void handlePCReconstructionExtended(CompilationUnit *cUnit) +{ + ArmLIR **pcrLabel = + (ArmLIR **) cUnit->pcReconstructionListExtended.elemList; + int numElems = cUnit->pcReconstructionListExtended.numUsed; + int i; + ArmLIR *exceptionBlock; + if(numElems>0){ + exceptionBlock = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true); + exceptionBlock->opcode = kArmPseudoEHBlockLabel; + for (i = 0; i < numElems; i++) { + dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); + /* r0 = dalvik PC */ + loadConstant(cUnit, r0, pcrLabel[i]->operands[0]); + genUnconditionalBranch(cUnit, exceptionBlock); + } + /* appened exception block after pcReconstruction blocks */ + dvmCompilerAppendLIR(cUnit, (LIR *) exceptionBlock); + loadWordDisp(cUnit, r6SELF, offsetof(Thread, + jitToInterpEntries.dvmJitToInterpPunt), + r1); + opReg(cUnit, kOpBlx, r1); + } +} + /* Load the Dalvik PC into r0 and jump to the specified target */ static void handlePCReconstruction(CompilationUnit *cUnit, ArmLIR *targetLabel) @@ -3842,6 +3992,9 @@ static void handlePCReconstruction(CompilationUnit *cUnit, newLIR0(cUnit, kThumbUndefined); } + /* handle pcReconstruction for extended MIRs */ + handlePCReconstructionExtended(cUnit); + for (i = 0; i < numElems; i++) { dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); /* r0 = dalvik PC */ @@ -3859,6 +4012,13 @@ static const char *extendedMIROpNames[kMirOpLast - kMirOpFirst] = { "kMirOpCheckInlinePrediction", }; + +__attribute__((weak)) bool genHoistedChecksForCountUpLoopThumb(CompilationUnit *cUnit, + MIR *mir) +{ + return false; +} + /* * vA = arrayReg; * vB = idxReg; @@ -3869,6 +4029,8 @@ static const char *extendedMIROpNames[kMirOpLast - kMirOpFirst] = { */ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) { + if(genHoistedChecksForCountUpLoopThumb(cUnit, mir)) + return; /* * NOTE: these synthesized blocks don't have ssa names assigned * for Dalvik registers. However, because they dominate the following @@ -3885,9 +4047,11 @@ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) /* regArray <- arrayRef */ rlArray = loadValue(cUnit, rlArray, kCoreReg); rlIdxEnd = loadValue(cUnit, rlIdxEnd, kCoreReg); - genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0, - (ArmLIR *) cUnit->loopAnalysis->branchToPCR); - + if (!dvmIsBitSet(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA)){ + dvmSetBit(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA); + genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0, + (ArmLIR *) cUnit->loopAnalysis->branchToPCR); + } /* regLength <- len(arrayRef) */ regLength = dvmCompilerAllocTemp(cUnit); loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLength); @@ -3912,6 +4076,11 @@ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) (ArmLIR *) cUnit->loopAnalysis->branchToPCR); } +__attribute__((weak)) bool genHoistedChecksForCountDownLoopThumb(CompilationUnit *cUnit, + MIR *mir) +{ + return false; +} /* * vA = arrayReg; * vB = idxReg; @@ -3922,6 +4091,9 @@ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) */ static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) { + if(genHoistedChecksForCountDownLoopThumb(cUnit, mir)) + return; + DecodedInstruction *dInsn = &mir->dalvikInsn; const int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); const int regLength = dvmCompilerAllocTemp(cUnit); @@ -3932,8 +4104,11 @@ static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) /* regArray <- arrayRef */ rlArray = loadValue(cUnit, rlArray, kCoreReg); rlIdxInit = loadValue(cUnit, rlIdxInit, kCoreReg); - genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0, - (ArmLIR *) cUnit->loopAnalysis->branchToPCR); + if (!dvmIsBitSet(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA)){ + dvmSetBit(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA); + genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0, + (ArmLIR *) cUnit->loopAnalysis->branchToPCR); + } /* regLength <- len(arrayRef) */ loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLength); @@ -3950,12 +4125,20 @@ static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) (ArmLIR *) cUnit->loopAnalysis->branchToPCR); } +__attribute__((weak)) bool genHoistedLowerBoundCheckThumb(CompilationUnit *cUnit, + MIR *mir) +{ + return false; +} /* * vA = idxReg; * vB = minC; */ static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) { + if(genHoistedLowerBoundCheckThumb(cUnit, mir)) + return; + DecodedInstruction *dInsn = &mir->dalvikInsn; const int minC = dInsn->vB; RegLocation rlIdx = cUnit->regLocation[mir->dalvikInsn.vA]; @@ -4125,12 +4308,12 @@ static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, { /* Set up the place holder to reconstruct this Dalvik PC */ ArmLIR *pcrLabel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); - pcrLabel->opcode = kArmPseudoPCReconstructionCell; + pcrLabel->opcode = kArmPseudoPCReconstructionCellExtended; pcrLabel->operands[0] = (int) (cUnit->method->insns + entry->startOffset); pcrLabel->operands[1] = entry->startOffset; /* Insert the place holder to the growable list */ - dvmInsertGrowableList(&cUnit->pcReconstructionList, (intptr_t) pcrLabel); + dvmInsertGrowableList(&cUnit->pcReconstructionListExtended, (intptr_t)pcrLabel); /* * Next, create two branches - one branch over to the loop body and the @@ -4176,20 +4359,40 @@ static bool selfVerificationPuntOps(MIR *mir) } #endif +__attribute__((weak)) void dvmCompilerCheckStats(CompilationUnit *cUnit) +{ + if (cUnit->printMe){ + ALOGV("extra size in ChainingCells: %d",cUnit->chainingCellExtraSize); + ALOGV("number of extended PCReconstruction cells: %d", + cUnit->pcReconstructionListExtended.numUsed); + } +} + +__attribute__((weak)) void dvmCompilerCheckBlockStats(CompilationUnit *cUnit, BasicBlock *bb) +{ + if(cUnit->printMe){ + ALOGV("Current block:%d",bb->id); + if(bb->taken) + ALOGV("Next taken block:%d", bb->taken->id); + if(bb->fallThrough) + ALOGV("Next fallThrough block:%d",bb->fallThrough->id); + } +} + void dvmCompilerMIR2LIR(CompilationUnit *cUnit) { /* Used to hold the labels of each block */ - ArmLIR *labelList = - (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR) * cUnit->numBlocks, true); + cUnit->labelList = + (void *) dvmCompilerNew(sizeof(ArmLIR) * cUnit->numBlocks, true); + ArmLIR *labelList = (ArmLIR *)(cUnit->labelList); ArmLIR *headLIR = NULL; - GrowableList chainingListByType[kChainingCellGap]; int i; /* * Initialize various types chaining lists. */ for (i = 0; i < kChainingCellGap; i++) { - dvmInitGrowableList(&chainingListByType[i], 2); + dvmInitGrowableList(&(cUnit->chainingListByType[i]), 2); } /* Clear the visited flag for each block */ @@ -4210,6 +4413,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) if (bb->visited == true) continue; labelList[i].operands[0] = bb->startOffset; + bb->blockLabelLIR = (LIR *) &labelList[i]; if (bb->blockType >= kChainingCellGap) { if (bb->isFallThroughFromInvoke == true) { @@ -4227,7 +4431,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) labelList[i].opcode = kArmPseudoEntryBlock; if (bb->firstMIRInsn == NULL) { continue; - } else { + } else if(cUnit->hasHoistedChecks) { setupLoopEntryBlock(cUnit, bb, &labelList[bb->fallThrough->id]); } @@ -4247,7 +4451,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) labelList[i].opcode = kArmPseudoChainingCellNormal; /* handle the codegen later */ dvmInsertGrowableList( - &chainingListByType[kChainingCellNormal], i); + &(cUnit->chainingListByType[kChainingCellNormal]), i); break; case kChainingCellInvokeSingleton: labelList[i].opcode = @@ -4256,7 +4460,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) (int) bb->containingMethod; /* handle the codegen later */ dvmInsertGrowableList( - &chainingListByType[kChainingCellInvokeSingleton], i); + &(cUnit->chainingListByType[kChainingCellInvokeSingleton]), i); break; case kChainingCellInvokePredicted: labelList[i].opcode = @@ -4270,14 +4474,14 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) labelList[i].operands[0] = labelList[i].operands[1]; /* handle the codegen later */ dvmInsertGrowableList( - &chainingListByType[kChainingCellInvokePredicted], i); + &(cUnit->chainingListByType[kChainingCellInvokePredicted]), i); break; case kChainingCellHot: labelList[i].opcode = kArmPseudoChainingCellHot; /* handle the codegen later */ dvmInsertGrowableList( - &chainingListByType[kChainingCellHot], i); + &(cUnit->chainingListByType[kChainingCellHot]), i); break; case kPCReconstruction: /* Make sure exception handling block is next */ @@ -4300,7 +4504,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) kArmPseudoChainingCellBackwardBranch; /* handle the codegen later */ dvmInsertGrowableList( - &chainingListByType[kChainingCellBackwardBranch], + &(cUnit->chainingListByType[kChainingCellBackwardBranch]), i); break; default: @@ -4392,6 +4596,8 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) if (singleStepMe || cUnit->allSingleStep) { notHandled = false; genInterpSingleStep(cUnit, mir); + } else if (isInvalidMIR(cUnit, mir)) { + notHandled = false; } else { opcodeCoverage[dalvikOpcode]++; switch (dalvikFormat) { @@ -4486,9 +4692,11 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit) break; } } + dvmCompilerCheckBlockStats(cUnit,bb); } - if (bb->blockType == kEntryBlock) { + if (bb->blockType == kEntryBlock && + cUnit->hasHoistedChecks) { dvmCompilerAppendLIR(cUnit, (LIR *) cUnit->loopAnalysis->branchToBody); dvmCompilerAppendLIR(cUnit, @@ -4518,11 +4726,12 @@ gen_fallthrough: } /* Handle the chaining cells in predefined order */ + cUnit->chainingCellExtraSize=0; for (i = 0; i < kChainingCellGap; i++) { size_t j; - int *blockIdList = (int *) chainingListByType[i].elemList; + int *blockIdList = (int *) (cUnit->chainingListByType[i].elemList); - cUnit->numChainingCells[i] = chainingListByType[i].numUsed; + cUnit->numChainingCells[i] = cUnit->chainingListByType[i].numUsed; /* No chaining cells of this type */ if (cUnit->numChainingCells[i] == 0) @@ -4531,7 +4740,7 @@ gen_fallthrough: /* Record the first LIR for a new type of chaining cell */ cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; - for (j = 0; j < chainingListByType[i].numUsed; j++) { + for (j = 0; j < cUnit->chainingListByType[i].numUsed; j++) { int blockId = blockIdList[j]; BasicBlock *chainingBlock = (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, @@ -4543,7 +4752,6 @@ gen_fallthrough: /* Insert the pseudo chaining instruction */ dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); - switch (chainingBlock->blockType) { case kChainingCellNormal: handleNormalChainingCell(cUnit, chainingBlock->startOffset); @@ -4588,6 +4796,8 @@ gen_fallthrough: opReg(cUnit, kOpBlx, r2); } + dvmCompilerCheckStats(cUnit); + dvmCompilerApplyGlobalOptimizations(cUnit); #if defined(WITH_SELF_VERIFICATION) @@ -4694,9 +4904,9 @@ bool dvmCompilerArchInit() int i; for (i = 0; i < kArmLast; i++) { - if (EncodingMap[i].opcode != i) { - ALOGE("Encoding order for %s is wrong: expecting %d, seeing %d", - EncodingMap[i].name, i, EncodingMap[i].opcode); + if (getEncoding((ArmOpcode)i)->opcode != i) { + ALOGE("Encoding order for %s is wrong: expecting %d, seeing %d", + getEncoding((ArmOpcode)i)->name, i, getEncoding((ArmOpcode)i)->opcode); dvmAbort(); // OK to dvmAbort - build error } } @@ -4751,3 +4961,63 @@ void dvmCompilerFlushRegWideImpl(CompilationUnit *cUnit, int rBase, { storeBaseDispWide(cUnit, rBase, displacement, rSrcLo, rSrcHi); } + +LocalOptsFuncMap localOptsFunMap = { + + handleEasyDivide, + handleEasyMultiply, + handleExecuteInline, + handleExtendedMIR, + insertChainingSwitch, + isPopCountLE2, + isPowerOfTwo, + lowestSetBit, + markCard, + setupLoopEntryBlock, + genInterpSingleStep, + setMemRefType, + annotateDalvikRegAccess, + setupResourceMasks, + newLIR0, + newLIR1, + newLIR2, + newLIR3, +#if defined(_ARMV7_A) || defined(_ARMV7_A_NEON) + newLIR4, +#endif + inlinedTarget, + genCheckCommon, + loadWordDisp, + storeWordDisp, + loadValueDirect, + loadValueDirectFixed, + loadValueDirectWide, + loadValueDirectWideFixed, + loadValue, + storeValue, + loadValueWide, + genNullCheck, + genRegRegCheck, + genZeroCheck, + genBoundsCheck, + loadConstantNoClobber, + loadConstant, + storeValueWide, + genSuspendPoll, + storeBaseDispWide, + storeBaseDisp, + loadBaseDispWide, + opRegRegImm, + opRegRegReg, + loadBaseIndexed, + storeBaseIndexed, + dvmCompilerRegClassBySize, + encodeShift, + opRegReg, + opCondBranch, + genIT, + genBarrier, + modifiedImmediate, + genRegImmCheck, +}; + diff --git a/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp b/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp index abbf2c9b4..750cbdc19 100644 --- a/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp +++ b/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp @@ -208,6 +208,12 @@ static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir) return false; } +__attribute__((weak)) bool genCmpFPThumb2(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + return true; +} + static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { @@ -215,6 +221,9 @@ static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, int defaultResult; RegLocation rlResult; + if(!genCmpFPThumb2(cUnit, mir, rlDest, rlSrc1, rlSrc2)) + return false; + switch(mir->dalvikInsn.opcode) { case OP_CMPL_FLOAT: isDouble = false; diff --git a/vm/compiler/codegen/arm/LocalOptimizations.cpp b/vm/compiler/codegen/arm/LocalOptimizations.cpp index cb35d745d..b3fb5ca0a 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.cpp +++ b/vm/compiler/codegen/arm/LocalOptimizations.cpp @@ -67,6 +67,23 @@ static void convertMemOpIntoMove(CompilationUnit *cUnit, ArmLIR *origLIR, dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) moveLIR); } +/* placeholder function for extra check on current lir */ +__attribute__((weak)) bool checkSpecialLIR(ArmLIR **lir) +{ + return false; +} + +__attribute__((weak)) void dumpBothLIRs(CompilationUnit *cUnit, + ArmLIR *thisLIR, ArmLIR *checkLIR) +{ + if(cUnit->printMe){ + ALOGD("thisLIR"); + dvmDumpLIRInsn((LIR*)thisLIR,0); + ALOGD("checkLIR"); + dvmDumpLIRInsn((LIR*)checkLIR,0); + } +} + /* * Perform a pass of top-down walk, from the second-last instruction in the * superblock, to eliminate redundant loads and stores. @@ -101,12 +118,13 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, /* Skip non-interesting instructions */ if ((thisLIR->flags.isNop == true) || isPseudoOpcode(thisLIR->opcode) || - !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) { + checkSpecialLIR(&thisLIR) || + !(getEncoding(thisLIR->opcode)->flags & (IS_LOAD | IS_STORE))) { continue; } int nativeRegId = thisLIR->operands[0]; - bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD; + bool isThisLIRLoad = getEncoding(thisLIR->opcode)->flags & IS_LOAD; ArmLIR *checkLIR; /* Use the mem mask to determine the rough memory location */ u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM; @@ -146,14 +164,14 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, * Potential aliases seen - check the alias relations */ if (checkMemMask != ENCODE_MEM && aliasCondition != 0) { - bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags & + bool isCheckLIRLoad = getEncoding(checkLIR->opcode)->flags & IS_LOAD; if (aliasCondition == ENCODE_LITERAL) { /* * Should only see literal loads in the instruction * stream. */ - assert(!(EncodingMap[checkLIR->opcode].flags & + assert(!(getEncoding(checkLIR->opcode)->flags & IS_STORE)); /* Same value && same register type */ if (checkLIR->aliasInfo == thisLIR->aliasInfo && @@ -216,6 +234,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, * case for this so we just stop here to be * conservative. */ + dumpBothLIRs(cUnit,thisLIR, checkLIR); stopHere = true; } } @@ -260,6 +279,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, } else if (!checkLIR->flags.isNop) { sinkDistance++; } + checkSpecialLIR(&checkLIR); } } } @@ -290,7 +310,8 @@ static void applyLoadHoisting(CompilationUnit *cUnit, /* Skip non-interesting instructions */ if ((thisLIR->flags.isNop == true) || isPseudoOpcode(thisLIR->opcode) || - !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) { + checkSpecialLIR(&thisLIR) || + !(getEncoding(thisLIR->opcode)->flags & IS_LOAD)) { continue; } @@ -324,6 +345,8 @@ static void applyLoadHoisting(CompilationUnit *cUnit, */ if (checkLIR->flags.isNop) continue; + checkSpecialLIR(&checkLIR); + u8 checkMemMask = checkLIR->defMask & ENCODE_MEM; u8 aliasCondition = stopUseAllMask & checkMemMask; stopHere = false; @@ -389,7 +412,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, ArmLIR *depLIR = prevInstList[nextSlot-1]; /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */ if (!isPseudoOpcode(depLIR->opcode) && - (EncodingMap[depLIR->opcode].flags & IS_LOAD)) { + (getEncoding(depLIR->opcode)->flags & IS_LOAD)) { firstSlot -= LDLD_DISTANCE; } /* @@ -409,7 +432,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, * If the first instruction is a load, don't hoist anything * above it since it is unlikely to be beneficial. */ - if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue; + if (getEncoding(curLIR->opcode)->flags & IS_LOAD) continue; /* * Need to unconditionally break here even if the hoisted * distance is greater than LD_LATENCY (ie more than enough @@ -429,7 +452,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, * the remaining instructions are less than LD_LATENCY. */ if (((curLIR->useMask & prevLIR->defMask) && - (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) || + (getEncoding(prevLIR->opcode)->flags & IS_LOAD)) || (slot < LD_LATENCY)) { break; } @@ -453,6 +476,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit, } } +#ifndef WITH_QC_PERF /* * Find all lsl/lsr and add that can be replaced with a * combined lsl/lsr + add @@ -582,6 +606,7 @@ static void applyShiftArithmeticOpts(CompilationUnit *cUnit, } } } +#endif void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR, LIR *tailLIR) @@ -593,7 +618,9 @@ void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR, if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) { applyLoadHoisting(cUnit, (ArmLIR *) headLIR, (ArmLIR *) tailLIR); } +#ifndef WITH_QC_PERF if (!(gDvmJit.disableOpt & (1 << kShiftArithmetic))) { applyShiftArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR); } +#endif } diff --git a/vm/compiler/codegen/arm/Thumb/Gen.cpp b/vm/compiler/codegen/arm/Thumb/Gen.cpp index 622f47eff..52d492c32 100644 --- a/vm/compiler/codegen/arm/Thumb/Gen.cpp +++ b/vm/compiler/codegen/arm/Thumb/Gen.cpp @@ -123,7 +123,7 @@ static bool partialOverlap(int sreg1, int sreg2) static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp, OpKind secondOp, RegLocation rlDest, - RegLocation rlSrc1, RegLocation rlSrc2) + RegLocation rlSrc1, RegLocation rlSrc2, bool setCCode) { RegLocation rlResult; if (partialOverlap(rlSrc1.sRegLow,rlSrc2.sRegLow) || @@ -275,6 +275,7 @@ static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit); } +#ifndef WITH_QC_PERF static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit, RegLocation rlSrc, RegLocation rlResult, int lit) { @@ -282,3 +283,145 @@ static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit, opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lit); opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg); } + +/* + * Generate array load + */ +static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlDest, int scale) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + RegLocation rlResult; + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIndex = loadValue(cUnit, rlIndex, kCoreReg); + int regPtr; + + /* null object? */ + ArmLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, + rlArray.lowReg, mir->offset, NULL); + } + + regPtr = dvmCompilerAllocTemp(cUnit); + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + int regLen = dvmCompilerAllocTemp(cUnit); + /* Get len */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); + /* regPtr -> array data */ + opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset); + genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, + pcrLabel); + dvmCompilerFreeTemp(cUnit, regLen); + } else { + /* regPtr -> array data */ + opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset); + } + if ((size == kLong) || (size == kDouble)) { + if (scale) { + int rNewIndex = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale); + opRegReg(cUnit, kOpAdd, regPtr, rNewIndex); + dvmCompilerFreeTemp(cUnit, rNewIndex); + } else { + opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg); + } + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); + + HEAP_ACCESS_SHADOW(true); + loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); + storeValueWide(cUnit, rlDest, rlResult); + } else { + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); + + HEAP_ACCESS_SHADOW(true); + loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg, + scale, size); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); + storeValue(cUnit, rlDest, rlResult); + } +} + +/* + * Generate array store + * + */ +static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlSrc, int scale) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + + int regPtr; + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIndex = loadValue(cUnit, rlIndex, kCoreReg); + + if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) { + dvmCompilerClobber(cUnit, rlArray.lowReg); + regPtr = rlArray.lowReg; + } else { + regPtr = dvmCompilerAllocTemp(cUnit); + genRegCopy(cUnit, regPtr, rlArray.lowReg); + } + + /* null object? */ + ArmLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, + mir->offset, NULL); + } + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + int regLen = dvmCompilerAllocTemp(cUnit); + //NOTE: max live temps(4) here. + /* Get len */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); + /* regPtr -> array data */ + opRegImm(cUnit, kOpAdd, regPtr, dataOffset); + genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, + pcrLabel); + dvmCompilerFreeTemp(cUnit, regLen); + } else { + /* regPtr -> array data */ + opRegImm(cUnit, kOpAdd, regPtr, dataOffset); + } + /* at this point, regPtr points to array, 2 live temps */ + if ((size == kLong) || (size == kDouble)) { + //TODO: need specific wide routine that can handle fp regs + if (scale) { + int rNewIndex = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale); + opRegReg(cUnit, kOpAdd, regPtr, rNewIndex); + } else { + opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg); + } + rlSrc = loadValueWide(cUnit, rlSrc, regClass); + + HEAP_ACCESS_SHADOW(true); + storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); + } else { + rlSrc = loadValue(cUnit, rlSrc, regClass); + + HEAP_ACCESS_SHADOW(true); + storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg, + scale, size); + HEAP_ACCESS_SHADOW(false); + } +} +#endif diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.cpp b/vm/compiler/codegen/arm/Thumb2/Factory.cpp index b9265e823..f1fa19d29 100644 --- a/vm/compiler/codegen/arm/Thumb2/Factory.cpp +++ b/vm/compiler/codegen/arm/Thumb2/Factory.cpp @@ -22,6 +22,9 @@ * */ +#define SET_CCODE (cUnit->setCCode = true) /* codegen changes CCode */ +#define UNSET_CCODE (cUnit->setCCode = false) /* codegen does not change CCode */ + static int coreTemps[] = {r0, r1, r2, r3, r4PC, r7, r8, r9, r10, r11, r12}; static int fpTemps[] = {fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23, fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31}; @@ -150,10 +153,6 @@ static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, return loadFPConstantValue(cUnit, rDest, value); } - /* See if the value can be constructed cheaply */ - if (LOWREG(rDest) && (value >= 0) && (value <= 255)) { - return newLIR2(cUnit, kThumbMovImm, rDest, value); - } /* Check Modified immediate special cases */ modImm = modifiedImmediate(value); if (modImm >= 0) { @@ -303,9 +302,19 @@ static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc) return newLIR1(cUnit, opcode, rDestSrc); } +__attribute__((weak)) ArmLIR *opRegRegShiftThumb2(CompilationUnit *cUnit, OpKind op, + int rDestSrc1, int rSrc2, int shift) +{ + return NULL; +} + static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1, int rSrc2, int shift) { + ArmLIR *res; + if((res = opRegRegShiftThumb2(cUnit, op, rDestSrc1, rSrc2, shift))) + return res; + bool thumbForm = ((shift == 0) && LOWREG(rDestSrc1) && LOWREG(rSrc2)); ArmOpcode opcode = kThumbBkpt; switch (op) { @@ -353,7 +362,7 @@ static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1, opcode = (thumbForm) ? kThumbMul : kThumb2MulRRR; break; case kOpMvn: - opcode = (thumbForm) ? kThumbMvn : kThumb2MnvRR; + opcode = (thumbForm) ? kThumbMvn : kThumb2MvnRR; break; case kOpNeg: assert(shift == 0); @@ -404,14 +413,14 @@ static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1, break; } assert(opcode >= 0); - if (EncodingMap[opcode].flags & IS_BINARY_OP) + if (getEncoding(opcode)->flags & IS_BINARY_OP) return newLIR2(cUnit, opcode, rDestSrc1, rSrc2); - else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { - if (EncodingMap[opcode].fieldLoc[2].kind == kFmtShift) + else if (getEncoding(opcode)->flags & IS_TERTIARY_OP) { + if (getEncoding(opcode)->fieldLoc[2].kind == kFmtShift) return newLIR3(cUnit, opcode, rDestSrc1, rSrc2, shift); else return newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, rSrc2); - } else if (EncodingMap[opcode].flags & IS_QUAD_OP) + } else if (getEncoding(opcode)->flags & IS_QUAD_OP) return newLIR4(cUnit, opcode, rDestSrc1, rDestSrc1, rSrc2, shift); else { assert(0); @@ -425,9 +434,20 @@ static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1, return opRegRegShift(cUnit, op, rDestSrc1, rSrc2, 0); } +__attribute__((weak)) ArmLIR *opRegRegRegShiftThumb2(CompilationUnit *cUnit, OpKind op, + int rDest, int rSrc1, int rSrc2, int shift) +{ + return NULL; +} + static ArmLIR *opRegRegRegShift(CompilationUnit *cUnit, OpKind op, int rDest, int rSrc1, int rSrc2, int shift) { + ArmLIR *res; + + if((res = opRegRegRegShiftThumb2(cUnit, op, rDest, rSrc1, rSrc2, shift))) + return res; + ArmOpcode opcode = kThumbBkpt; bool thumbForm = (shift == 0) && LOWREG(rDest) && LOWREG(rSrc1) && LOWREG(rSrc2); @@ -481,10 +501,10 @@ static ArmLIR *opRegRegRegShift(CompilationUnit *cUnit, OpKind op, break; } assert(opcode >= 0); - if (EncodingMap[opcode].flags & IS_QUAD_OP) + if (getEncoding(opcode)->flags & IS_QUAD_OP) return newLIR4(cUnit, opcode, rDest, rSrc1, rSrc2, shift); else { - assert(EncodingMap[opcode].flags & IS_TERTIARY_OP); + assert(getEncoding(opcode)->flags & IS_TERTIARY_OP); return newLIR3(cUnit, opcode, rDest, rSrc1, rSrc2); } } @@ -495,10 +515,20 @@ static ArmLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest, return opRegRegRegShift(cUnit, op, rDest, rSrc1, rSrc2, 0); } +__attribute__((weak)) ArmLIR *opRegRegImmThumb2(CompilationUnit *cUnit, OpKind op, int rDest, + int rSrc1, int value) +{ + return NULL; +} + static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, int rSrc1, int value) { ArmLIR *res; + + if((res = opRegRegImmThumb2(cUnit, op, rDest, rSrc1, value))) + return res; + bool neg = (value < 0); int absValue = (neg) ? -value : value; ArmOpcode opcode = kThumbBkpt; @@ -608,7 +638,7 @@ static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, } else { int rScratch = dvmCompilerAllocTemp(cUnit); loadConstant(cUnit, rScratch, value); - if (EncodingMap[altOpcode].flags & IS_QUAD_OP) + if (getEncoding(altOpcode)->flags & IS_QUAD_OP) res = newLIR4(cUnit, altOpcode, rDest, rSrc1, rScratch, 0); else res = newLIR3(cUnit, altOpcode, rDest, rSrc1, rScratch); @@ -617,10 +647,21 @@ static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, } } +__attribute__((weak)) ArmLIR *opRegImmThumb2(CompilationUnit *cUnit, OpKind op, int rDestSrc1, + int value) +{ + return NULL; +} + /* Handle Thumb-only variants here - otherwise punt to opRegRegImm */ static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, int value) { + ArmLIR *res; + + if((res = opRegImmThumb2(cUnit, op, rDestSrc1, value))) + return res; + bool neg = (value < 0); int absValue = (neg) ? -value : value; bool shortForm = (((absValue & 0xff) == absValue) && LOWREG(rDestSrc1)); @@ -1153,11 +1194,25 @@ static void storePair(CompilationUnit *cUnit, int base, int lowReg, int highReg) storeBaseDispWide(cUnit, base, 0, lowReg, highReg); } +#ifndef WITH_QC_PERF +static void storePair(CompilationUnit *cUnit, int base, int displacement, int lowReg, int highReg) +{ + storeBaseDispWide(cUnit, base, displacement, lowReg, highReg); +} +#endif + static void loadPair(CompilationUnit *cUnit, int base, int lowReg, int highReg) { loadBaseDispWide(cUnit, NULL, base, 0, lowReg, highReg, INVALID_SREG); } +#ifndef WITH_QC_PERF +static void loadPair(CompilationUnit *cUnit, int base, int displacement, int lowReg, int highReg) +{ + loadBaseDispWide(cUnit, NULL, base, displacement, lowReg, highReg, INVALID_SREG); +} +#endif + /* * Generate a register comparison to an immediate and branch. Caller * is responsible for setting branch target field. @@ -1223,7 +1278,7 @@ static ArmLIR* genRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc) return fpRegCopy(cUnit, rDest, rSrc); res = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true); if (LOWREG(rDest) && LOWREG(rSrc)) - opcode = kThumbMovRR; + opcode = kThumb2MovRR; else if (!LOWREG(rDest) && !LOWREG(rSrc)) opcode = kThumbMovRR_H2H; else if (LOWREG(rDest)) diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.cpp b/vm/compiler/codegen/arm/Thumb2/Gen.cpp index df37478e3..a2adc6f7e 100644 --- a/vm/compiler/codegen/arm/Thumb2/Gen.cpp +++ b/vm/compiler/codegen/arm/Thumb2/Gen.cpp @@ -124,13 +124,15 @@ static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest, static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp, OpKind secondOp, RegLocation rlDest, - RegLocation rlSrc1, RegLocation rlSrc2) + RegLocation rlSrc1, RegLocation rlSrc2, bool setCCode) { RegLocation rlResult; rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + if(setCCode) SET_CCODE; opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); + if(setCCode) UNSET_CCODE; opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg, rlSrc2.highReg); storeValueWide(cUnit, rlDest, rlResult); @@ -307,9 +309,18 @@ static void genMonitorExit(CompilationUnit *cUnit, MIR *mir) // Is lock unheld on lock or held by us (==threadId) on unlock? opRegRegImm(cUnit, kOpAnd, r7, r2, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); +#ifdef WITH_QC_PERF + opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner +#endif newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); +#ifndef WITH_QC_PERF opRegRegRegShift(cUnit, kOpSub, r2, r2, r3, encodeShift(kArmLsl, LW_LOCK_OWNER_SHIFT)); // Align owner +#else + SET_CCODE; + opRegReg(cUnit, kOpSub, r2, r3); + UNSET_CCODE; +#endif hopBranch = opCondBranch(cUnit, kArmCondNe); dvmCompilerGenMemBarrier(cUnit, kSY); storeWordDisp(cUnit, r1, offsetof(Object, lock), r7); @@ -351,6 +362,13 @@ static void genMonitor(CompilationUnit *cUnit, MIR *mir) genMonitorExit(cUnit, mir); } +__attribute__((weak)) bool genCmpLongThumb2(CompilationUnit *cUnit, MIR *mir, + RegLocation rlDest, RegLocation rlSrc1, + RegLocation rlSrc2) +{ + return false; +} + /* * 64-bit 3way compare function. * mov r7, #-1 @@ -370,6 +388,9 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { + if(genCmpLongThumb2(cUnit, mir, rlDest, rlSrc1, rlSrc2)) + return; + RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change ArmLIR *target1; ArmLIR *target2; @@ -380,7 +401,9 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir, opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg); ArmLIR *branch1 = opCondBranch(cUnit, kArmCondLt); ArmLIR *branch2 = opCondBranch(cUnit, kArmCondGt); + SET_CCODE; opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); + UNSET_CCODE; ArmLIR *branch3 = opCondBranch(cUnit, kArmCondEq); genIT(cUnit, kArmCondHi, "E"); @@ -453,9 +476,154 @@ static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, } } +#ifndef WITH_QC_PERF static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit, RegLocation rlSrc, RegLocation rlResult, int lit) { newLIR4(cUnit, kThumb2RsbRRR, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg, encodeShift(kArmLsl, lit)); } + +/* + * Generate array load. + * For wide array access using scale, combine add with shift. + * When using offset, use ldr instruction with offset capabilities. + */ +static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlDest, int scale) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + RegLocation rlResult; + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIndex = loadValue(cUnit, rlIndex, kCoreReg); + int regPtr; + + /* null object? */ + ArmLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, + rlArray.lowReg, mir->offset, NULL); + } + + regPtr = dvmCompilerAllocTemp(cUnit); + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + int regLen = dvmCompilerAllocTemp(cUnit); + /* Get len */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); + genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, + pcrLabel); + dvmCompilerFreeTemp(cUnit, regLen); + } + if ((size == kLong) || (size == kDouble)) { + int rNewIndex = dvmCompilerAllocTemp(cUnit); + if (scale) { + /* Combine add with shift */ + opRegRegRegShift(cUnit, kOpAdd, rNewIndex, rlArray.lowReg, + rlIndex.lowReg, encodeShift(kArmLsl, scale)); + } else { + opRegRegReg(cUnit, kOpAdd, rNewIndex, regPtr, rlIndex.lowReg); + } + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); + + HEAP_ACCESS_SHADOW(true); + /* Use data offset */ + loadPair(cUnit, rNewIndex, dataOffset, rlResult.lowReg, rlResult.highReg); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, rNewIndex); + dvmCompilerFreeTemp(cUnit, regPtr); + storeValueWide(cUnit, rlDest, rlResult); + } else { + /* regPtr -> array data */ + opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset); + + rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true); + + HEAP_ACCESS_SHADOW(true); + loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg, + scale, size); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, regPtr); + storeValue(cUnit, rlDest, rlResult); + } +} + +/* + * Generate array store. + * For wide array access using scale, combine add with shift. + * When using offset, use str instruction with offset capabilities. + */ +static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size, + RegLocation rlArray, RegLocation rlIndex, + RegLocation rlSrc, int scale) +{ + RegisterClass regClass = dvmCompilerRegClassBySize(size); + int lenOffset = OFFSETOF_MEMBER(ArrayObject, length); + int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents); + + int regPtr; + rlArray = loadValue(cUnit, rlArray, kCoreReg); + rlIndex = loadValue(cUnit, rlIndex, kCoreReg); + + if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) { + dvmCompilerClobber(cUnit, rlArray.lowReg); + regPtr = rlArray.lowReg; + } else { + regPtr = dvmCompilerAllocTemp(cUnit); + genRegCopy(cUnit, regPtr, rlArray.lowReg); + } + + /* null object? */ + ArmLIR * pcrLabel = NULL; + + if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) { + pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, + mir->offset, NULL); + } + + if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) { + int regLen = dvmCompilerAllocTemp(cUnit); + //NOTE: max live temps(4) here. + /* Get len */ + loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen); + genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset, + pcrLabel); + dvmCompilerFreeTemp(cUnit, regLen); + } + /* at this point, regPtr points to array, 2 live temps */ + if ((size == kLong) || (size == kDouble)) { + //TODO: need specific wide routine that can handle fp regs + int rNewIndex = dvmCompilerAllocTemp(cUnit); + if (scale) { + opRegRegRegShift(cUnit, kOpAdd, rNewIndex, rlArray.lowReg, + rlIndex.lowReg, encodeShift(kArmLsl, scale)); + } else { + opRegRegReg(cUnit, kOpAdd, rNewIndex, regPtr, rlIndex.lowReg); + } + rlSrc = loadValueWide(cUnit, rlSrc, regClass); + + HEAP_ACCESS_SHADOW(true); + storePair(cUnit, rNewIndex, dataOffset, rlSrc.lowReg, rlSrc.highReg); + HEAP_ACCESS_SHADOW(false); + + dvmCompilerFreeTemp(cUnit, rNewIndex); + dvmCompilerFreeTemp(cUnit, regPtr); + } else { + /* regPtr -> array data */ + opRegImm(cUnit, kOpAdd, regPtr, dataOffset); + + rlSrc = loadValue(cUnit, rlSrc, regClass); + + HEAP_ACCESS_SHADOW(true); + storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg, + scale, size); + HEAP_ACCESS_SHADOW(false); + } +} +#endif diff --git a/vm/mterp/armv5te/OP_EXECUTE_INLINE.S b/vm/mterp/armv5te/OP_EXECUTE_INLINE.S index ca71de198..7a268dc21 100644 --- a/vm/mterp/armv5te/OP_EXECUTE_INLINE.S +++ b/vm/mterp/armv5te/OP_EXECUTE_INLINE.S @@ -46,6 +46,35 @@ * interleave a little better. Increases code size. */ .L${opcode}_continue: +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #5 @ r0<- 4-r0 + FETCH(rINST, 2) @ rINST<- FEDC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +6: b .L${opcode}_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) +4: and ip, rINST, #0xf000 @ isolate F + ldr r3, [rFP, ip, lsr #10] @ r3<- vF (shift right 12, left 2) +3: and ip, rINST, #0x0f00 @ isolate E + ldr r2, [rFP, ip, lsr #6] @ r2<- vE +2: and ip, rINST, #0x00f0 @ isolate D + ldr r1, [rFP, ip, lsr #2] @ r1<- vD +1: and ip, rINST, #0x000f @ isolate C + ldr r0, [rFP, ip, lsl #2] @ r0<- vC +0: + ldr rINST, .L${opcode}_table @ table of InlineOperation +5: add rINST, pc + ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) + +.L${opcode}_load_arg4: + FETCH(r1, 0) @ r1<- original rINST + mov r0, r1, lsr #8 + and ip, r0, #0x000f + ldr r0, [rFP, ip, lsl #2] @ r0<- vG + str r0, [sp, #4] + b 4b +#else rsb r0, r0, #4 @ r0<- 4-r0 FETCH(rINST, 2) @ rINST<- FEDC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each @@ -63,6 +92,7 @@ 5: add rINST, pc ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +#endif /* * We're debugging or profiling. diff --git a/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S b/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S index d9e35b85f..52951812e 100644 --- a/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S +++ b/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S @@ -19,11 +19,11 @@ bne .L${opcode}_debugmode @ yes - take slow path .L${opcode}_resume: add r1, rSELF, #offThread_retval @ r1<- &self->retval - sub sp, sp, #8 @ make room for arg, +64 bit align + sub sp, sp, #16 @ make room for arg, +64 bit align mov r0, rINST, lsr #8 @ r0<- AA str r1, [sp] @ push &self->retval bl .L${opcode}_continue @ make call; will return after - add sp, sp, #8 @ pop stack + add sp, sp, #16 @ pop stack cmp r0, #0 @ test boolean result of inline beq common_exceptionThrown @ returned false, handle exception FETCH_ADVANCE_INST(3) @ advance rPC, load rINST @@ -38,10 +38,17 @@ * lr = return addr, above [DO NOT bl out of here w/o preserving LR] */ .L${opcode}_continue: - rsb r0, r0, #4 @ r0<- 4-r0 +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #7 @ r0<- 4-r0 FETCH(r9, 2) @ r9<- CCCC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each bl common_abort @ (skipped due to ARM prefetch) +8: b .L${opcode}_load_arg6 + bl common_abort @ (skipped due to ARM prefetch) +7: b .L${opcode}_load_arg5 + bl common_abort @ (skipped due to ARM prefetch) +6: b .L${opcode}_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) 4: add ip, r9, #3 @ base+3 GET_VREG(r3, ip) @ r3<- vBase[3] 3: add ip, r9, #2 @ base+2 @@ -56,6 +63,43 @@ ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +.L${opcode}_load_arg6: + add ip, r9, #6 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #12] + b 7b + +.L${opcode}_load_arg5: + add ip, r9, #5 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #8] + b 6b + +.L${opcode}_load_arg4: + add ip, r9, #4 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #4] + b 4b + +#else + rsb r0, r0, #4 @ r0<- 4-r0 + FETCH(r9, 2) @ r9<- CCCC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +4: add ip, r9, #3 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] +3: add ip, r9, #2 @ base+2 + GET_VREG(r2, ip) @ r2<- vBase[2] +2: add ip, r9, #1 @ base+1 + GET_VREG(r1, ip) @ r1<- vBase[1] +1: add ip, r9, #0 @ (nop) + GET_VREG(r0, ip) @ r0<- vBase[0] +0: + ldr r9, .L${opcode}_table @ table of InlineOperation +5: add r9, pc + ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) +#endif /* * We're debugging or profiling. diff --git a/vm/mterp/c/OP_EXECUTE_INLINE.cpp b/vm/mterp/c/OP_EXECUTE_INLINE.cpp index 288ccc906..4655ae89b 100644 --- a/vm/mterp/c/OP_EXECUTE_INLINE.cpp +++ b/vm/mterp/c/OP_EXECUTE_INLINE.cpp @@ -1,5 +1,59 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4; + arg0 = arg1 = arg2 = arg3 = arg4 = 0; + + EXPORT_PC(); + + vsrc1 = INST_B(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* 0-4 register indices */ + ILOGV("|execute-inline args=%d @%d {regs=0x%04x}", + vsrc1, ref, vdst); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 5); + + switch (vsrc1) { + case 5: + arg4 = GET_REGISTER(INST_A(inst)); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst >> 12); + /* fall through */ + case 3: + arg2 = GET_REGISTER((vdst & 0x0f00) >> 8); + /* fall through */ + case 2: + arg1 = GET_REGISTER((vdst & 0x00f0) >> 4); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst & 0x0f); + /* fall through */ + default: // case 0 + ; + } + + if( vsrc1 == 5 ) { + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp5Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp5Std(arg0, arg1, arg2, arg3, &retval, ref, arg4)) + GOTO_exceptionThrown(); + } + } else { + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp4Dbg(arg0, arg1, arg2, arg3, &retval, ref)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) + GOTO_exceptionThrown(); + } + } + +#else //ifdef INLINE_ARG_EXPANDED /* * This has the same form as other method calls, but we ignore * the 5th argument (vA). This is chiefly because the first four @@ -54,6 +108,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END diff --git a/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp b/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp index 467f0e90e..48891d1bb 100644 --- a/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp +++ b/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp @@ -1,5 +1,56 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6; + arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0; /* placate gcc */ + arg6 = 0; + + + EXPORT_PC(); + + vsrc1 = INST_AA(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* range base */ + ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}", + vsrc1, ref, vdst, vdst+vsrc1-1); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 7); + + switch (vsrc1) { + case 7: + arg6 = GET_REGISTER(vdst+6); + /* fall through */ + case 6: + arg5 = GET_REGISTER(vdst+5); + /* fall through */ + case 5: + arg4 = GET_REGISTER(vdst+4); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst+3); + /* fall through */ + case 3: + arg2 = GET_REGISTER(vdst+2); + /* fall through */ + case 2: + arg1 = GET_REGISTER(vdst+1); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst+0); + /* fall through */ + default: // case 0 + ; + } + + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } +#else //ifdef INLINE_ARG_EXPANDED u4 arg0, arg1, arg2, arg3; arg0 = arg1 = arg2 = arg3 = 0; /* placate gcc */ @@ -38,6 +89,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END diff --git a/vm/mterp/common/asm-constants.h b/vm/mterp/common/asm-constants.h index 80b36fc04..406ee78cf 100644 --- a/vm/mterp/common/asm-constants.h +++ b/vm/mterp/common/asm-constants.h @@ -211,6 +211,8 @@ MTERP_OFFSET(offObject_lock, Object, lock, 4) /* Lock shape */ MTERP_CONSTANT(LW_LOCK_OWNER_SHIFT, 3) MTERP_CONSTANT(LW_HASH_STATE_SHIFT, 1) +MTERP_CONSTANT(LW_HASH_STATE_SIZE, 2) +MTERP_CONSTANT(LW_HASH_STATE_ABS_MASK, 0x6) /* ArrayObject fields */ MTERP_OFFSET(offArrayObject_length, ArrayObject, length, 8) diff --git a/vm/mterp/out/InterpAsm-armv5te-vfp.S b/vm/mterp/out/InterpAsm-armv5te-vfp.S index a173c7226..c9ee0c237 100644 --- a/vm/mterp/out/InterpAsm-armv5te-vfp.S +++ b/vm/mterp/out/InterpAsm-armv5te-vfp.S @@ -7342,11 +7342,11 @@ dalvik_inst: bne .LOP_EXECUTE_INLINE_RANGE_debugmode @ yes - take slow path .LOP_EXECUTE_INLINE_RANGE_resume: add r1, rSELF, #offThread_retval @ r1<- &self->retval - sub sp, sp, #8 @ make room for arg, +64 bit align + sub sp, sp, #16 @ make room for arg, +64 bit align mov r0, rINST, lsr #8 @ r0<- AA str r1, [sp] @ push &self->retval bl .LOP_EXECUTE_INLINE_RANGE_continue @ make call; will return after - add sp, sp, #8 @ pop stack + add sp, sp, #16 @ pop stack cmp r0, #0 @ test boolean result of inline beq common_exceptionThrown @ returned false, handle exception FETCH_ADVANCE_INST(3) @ advance rPC, load rINST @@ -9516,6 +9516,35 @@ d2l_doconv: * interleave a little better. Increases code size. */ .LOP_EXECUTE_INLINE_continue: +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #5 @ r0<- 4-r0 + FETCH(rINST, 2) @ rINST<- FEDC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) +4: and ip, rINST, #0xf000 @ isolate F + ldr r3, [rFP, ip, lsr #10] @ r3<- vF (shift right 12, left 2) +3: and ip, rINST, #0x0f00 @ isolate E + ldr r2, [rFP, ip, lsr #6] @ r2<- vE +2: and ip, rINST, #0x00f0 @ isolate D + ldr r1, [rFP, ip, lsr #2] @ r1<- vD +1: and ip, rINST, #0x000f @ isolate C + ldr r0, [rFP, ip, lsl #2] @ r0<- vC +0: + ldr rINST, .LOP_EXECUTE_INLINE_table @ table of InlineOperation +5: add rINST, pc + ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) + +.LOP_EXECUTE_INLINE_load_arg4: + FETCH(r1, 0) @ r1<- original rINST + mov r0, r1, lsr #8 + and ip, r0, #0x000f + ldr r0, [rFP, ip, lsl #2] @ r0<- vG + str r0, [sp, #4] + b 4b +#else rsb r0, r0, #4 @ r0<- 4-r0 FETCH(rINST, 2) @ rINST<- FEDC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each @@ -9533,6 +9562,7 @@ d2l_doconv: 5: add rINST, pc ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +#endif /* * We're debugging or profiling. @@ -9577,10 +9607,17 @@ d2l_doconv: * lr = return addr, above [DO NOT bl out of here w/o preserving LR] */ .LOP_EXECUTE_INLINE_RANGE_continue: - rsb r0, r0, #4 @ r0<- 4-r0 +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #7 @ r0<- 4-r0 FETCH(r9, 2) @ r9<- CCCC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each bl common_abort @ (skipped due to ARM prefetch) +8: b .LOP_EXECUTE_INLINE_RANGE_load_arg6 + bl common_abort @ (skipped due to ARM prefetch) +7: b .LOP_EXECUTE_INLINE_RANGE_load_arg5 + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_RANGE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) 4: add ip, r9, #3 @ base+3 GET_VREG(r3, ip) @ r3<- vBase[3] 3: add ip, r9, #2 @ base+2 @@ -9595,6 +9632,43 @@ d2l_doconv: ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +.LOP_EXECUTE_INLINE_RANGE_load_arg6: + add ip, r9, #6 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #12] + b 7b + +.LOP_EXECUTE_INLINE_RANGE_load_arg5: + add ip, r9, #5 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #8] + b 6b + +.LOP_EXECUTE_INLINE_RANGE_load_arg4: + add ip, r9, #4 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #4] + b 4b + +#else + rsb r0, r0, #4 @ r0<- 4-r0 + FETCH(r9, 2) @ r9<- CCCC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +4: add ip, r9, #3 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] +3: add ip, r9, #2 @ base+2 + GET_VREG(r2, ip) @ r2<- vBase[2] +2: add ip, r9, #1 @ base+1 + GET_VREG(r1, ip) @ r1<- vBase[1] +1: add ip, r9, #0 @ (nop) + GET_VREG(r0, ip) @ r0<- vBase[0] +0: + ldr r9, .LOP_EXECUTE_INLINE_RANGE_table @ table of InlineOperation +5: add r9, pc + ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) +#endif /* * We're debugging or profiling. diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S index 7b6c9d18f..5ba889d7c 100644 --- a/vm/mterp/out/InterpAsm-armv5te.S +++ b/vm/mterp/out/InterpAsm-armv5te.S @@ -7664,11 +7664,11 @@ d2i_doconv: bne .LOP_EXECUTE_INLINE_RANGE_debugmode @ yes - take slow path .LOP_EXECUTE_INLINE_RANGE_resume: add r1, rSELF, #offThread_retval @ r1<- &self->retval - sub sp, sp, #8 @ make room for arg, +64 bit align + sub sp, sp, #16 @ make room for arg, +64 bit align mov r0, rINST, lsr #8 @ r0<- AA str r1, [sp] @ push &self->retval bl .LOP_EXECUTE_INLINE_RANGE_continue @ make call; will return after - add sp, sp, #8 @ pop stack + add sp, sp, #16 @ pop stack cmp r0, #0 @ test boolean result of inline beq common_exceptionThrown @ returned false, handle exception FETCH_ADVANCE_INST(3) @ advance rPC, load rINST @@ -9974,6 +9974,35 @@ d2l_doconv: * interleave a little better. Increases code size. */ .LOP_EXECUTE_INLINE_continue: +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #5 @ r0<- 4-r0 + FETCH(rINST, 2) @ rINST<- FEDC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) +4: and ip, rINST, #0xf000 @ isolate F + ldr r3, [rFP, ip, lsr #10] @ r3<- vF (shift right 12, left 2) +3: and ip, rINST, #0x0f00 @ isolate E + ldr r2, [rFP, ip, lsr #6] @ r2<- vE +2: and ip, rINST, #0x00f0 @ isolate D + ldr r1, [rFP, ip, lsr #2] @ r1<- vD +1: and ip, rINST, #0x000f @ isolate C + ldr r0, [rFP, ip, lsl #2] @ r0<- vC +0: + ldr rINST, .LOP_EXECUTE_INLINE_table @ table of InlineOperation +5: add rINST, pc + ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) + +.LOP_EXECUTE_INLINE_load_arg4: + FETCH(r1, 0) @ r1<- original rINST + mov r0, r1, lsr #8 + and ip, r0, #0x000f + ldr r0, [rFP, ip, lsl #2] @ r0<- vG + str r0, [sp, #4] + b 4b +#else rsb r0, r0, #4 @ r0<- 4-r0 FETCH(rINST, 2) @ rINST<- FEDC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each @@ -9991,6 +10020,7 @@ d2l_doconv: 5: add rINST, pc ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +#endif /* * We're debugging or profiling. @@ -10035,10 +10065,17 @@ d2l_doconv: * lr = return addr, above [DO NOT bl out of here w/o preserving LR] */ .LOP_EXECUTE_INLINE_RANGE_continue: - rsb r0, r0, #4 @ r0<- 4-r0 +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #7 @ r0<- 4-r0 FETCH(r9, 2) @ r9<- CCCC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each bl common_abort @ (skipped due to ARM prefetch) +8: b .LOP_EXECUTE_INLINE_RANGE_load_arg6 + bl common_abort @ (skipped due to ARM prefetch) +7: b .LOP_EXECUTE_INLINE_RANGE_load_arg5 + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_RANGE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) 4: add ip, r9, #3 @ base+3 GET_VREG(r3, ip) @ r3<- vBase[3] 3: add ip, r9, #2 @ base+2 @@ -10053,6 +10090,43 @@ d2l_doconv: ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +.LOP_EXECUTE_INLINE_RANGE_load_arg6: + add ip, r9, #6 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #12] + b 7b + +.LOP_EXECUTE_INLINE_RANGE_load_arg5: + add ip, r9, #5 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #8] + b 6b + +.LOP_EXECUTE_INLINE_RANGE_load_arg4: + add ip, r9, #4 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #4] + b 4b + +#else + rsb r0, r0, #4 @ r0<- 4-r0 + FETCH(r9, 2) @ r9<- CCCC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +4: add ip, r9, #3 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] +3: add ip, r9, #2 @ base+2 + GET_VREG(r2, ip) @ r2<- vBase[2] +2: add ip, r9, #1 @ base+1 + GET_VREG(r1, ip) @ r1<- vBase[1] +1: add ip, r9, #0 @ (nop) + GET_VREG(r0, ip) @ r0<- vBase[0] +0: + ldr r9, .LOP_EXECUTE_INLINE_RANGE_table @ table of InlineOperation +5: add r9, pc + ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) +#endif /* * We're debugging or profiling. diff --git a/vm/mterp/out/InterpAsm-armv7-a-neon.S b/vm/mterp/out/InterpAsm-armv7-a-neon.S index c3419c230..7d3b08f46 100644 --- a/vm/mterp/out/InterpAsm-armv7-a-neon.S +++ b/vm/mterp/out/InterpAsm-armv7-a-neon.S @@ -7300,11 +7300,11 @@ dalvik_inst: bne .LOP_EXECUTE_INLINE_RANGE_debugmode @ yes - take slow path .LOP_EXECUTE_INLINE_RANGE_resume: add r1, rSELF, #offThread_retval @ r1<- &self->retval - sub sp, sp, #8 @ make room for arg, +64 bit align + sub sp, sp, #16 @ make room for arg, +64 bit align mov r0, rINST, lsr #8 @ r0<- AA str r1, [sp] @ push &self->retval bl .LOP_EXECUTE_INLINE_RANGE_continue @ make call; will return after - add sp, sp, #8 @ pop stack + add sp, sp, #16 @ pop stack cmp r0, #0 @ test boolean result of inline beq common_exceptionThrown @ returned false, handle exception FETCH_ADVANCE_INST(3) @ advance rPC, load rINST @@ -9453,6 +9453,35 @@ d2l_doconv: * interleave a little better. Increases code size. */ .LOP_EXECUTE_INLINE_continue: +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #5 @ r0<- 4-r0 + FETCH(rINST, 2) @ rINST<- FEDC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) +4: and ip, rINST, #0xf000 @ isolate F + ldr r3, [rFP, ip, lsr #10] @ r3<- vF (shift right 12, left 2) +3: and ip, rINST, #0x0f00 @ isolate E + ldr r2, [rFP, ip, lsr #6] @ r2<- vE +2: and ip, rINST, #0x00f0 @ isolate D + ldr r1, [rFP, ip, lsr #2] @ r1<- vD +1: and ip, rINST, #0x000f @ isolate C + ldr r0, [rFP, ip, lsl #2] @ r0<- vC +0: + ldr rINST, .LOP_EXECUTE_INLINE_table @ table of InlineOperation +5: add rINST, pc + ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) + +.LOP_EXECUTE_INLINE_load_arg4: + FETCH(r1, 0) @ r1<- original rINST + mov r0, r1, lsr #8 + and ip, r0, #0x000f + ldr r0, [rFP, ip, lsl #2] @ r0<- vG + str r0, [sp, #4] + b 4b +#else rsb r0, r0, #4 @ r0<- 4-r0 FETCH(rINST, 2) @ rINST<- FEDC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each @@ -9470,6 +9499,7 @@ d2l_doconv: 5: add rINST, pc ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +#endif /* * We're debugging or profiling. @@ -9514,10 +9544,17 @@ d2l_doconv: * lr = return addr, above [DO NOT bl out of here w/o preserving LR] */ .LOP_EXECUTE_INLINE_RANGE_continue: - rsb r0, r0, #4 @ r0<- 4-r0 +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #7 @ r0<- 4-r0 FETCH(r9, 2) @ r9<- CCCC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each bl common_abort @ (skipped due to ARM prefetch) +8: b .LOP_EXECUTE_INLINE_RANGE_load_arg6 + bl common_abort @ (skipped due to ARM prefetch) +7: b .LOP_EXECUTE_INLINE_RANGE_load_arg5 + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_RANGE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) 4: add ip, r9, #3 @ base+3 GET_VREG(r3, ip) @ r3<- vBase[3] 3: add ip, r9, #2 @ base+2 @@ -9532,6 +9569,43 @@ d2l_doconv: ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +.LOP_EXECUTE_INLINE_RANGE_load_arg6: + add ip, r9, #6 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #12] + b 7b + +.LOP_EXECUTE_INLINE_RANGE_load_arg5: + add ip, r9, #5 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #8] + b 6b + +.LOP_EXECUTE_INLINE_RANGE_load_arg4: + add ip, r9, #4 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #4] + b 4b + +#else + rsb r0, r0, #4 @ r0<- 4-r0 + FETCH(r9, 2) @ r9<- CCCC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +4: add ip, r9, #3 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] +3: add ip, r9, #2 @ base+2 + GET_VREG(r2, ip) @ r2<- vBase[2] +2: add ip, r9, #1 @ base+1 + GET_VREG(r1, ip) @ r1<- vBase[1] +1: add ip, r9, #0 @ (nop) + GET_VREG(r0, ip) @ r0<- vBase[0] +0: + ldr r9, .LOP_EXECUTE_INLINE_RANGE_table @ table of InlineOperation +5: add r9, pc + ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) +#endif /* * We're debugging or profiling. diff --git a/vm/mterp/out/InterpAsm-armv7-a.S b/vm/mterp/out/InterpAsm-armv7-a.S index 254224566..199d59acb 100644 --- a/vm/mterp/out/InterpAsm-armv7-a.S +++ b/vm/mterp/out/InterpAsm-armv7-a.S @@ -7300,11 +7300,11 @@ dalvik_inst: bne .LOP_EXECUTE_INLINE_RANGE_debugmode @ yes - take slow path .LOP_EXECUTE_INLINE_RANGE_resume: add r1, rSELF, #offThread_retval @ r1<- &self->retval - sub sp, sp, #8 @ make room for arg, +64 bit align + sub sp, sp, #16 @ make room for arg, +64 bit align mov r0, rINST, lsr #8 @ r0<- AA str r1, [sp] @ push &self->retval bl .LOP_EXECUTE_INLINE_RANGE_continue @ make call; will return after - add sp, sp, #8 @ pop stack + add sp, sp, #16 @ pop stack cmp r0, #0 @ test boolean result of inline beq common_exceptionThrown @ returned false, handle exception FETCH_ADVANCE_INST(3) @ advance rPC, load rINST @@ -9453,6 +9453,35 @@ d2l_doconv: * interleave a little better. Increases code size. */ .LOP_EXECUTE_INLINE_continue: +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #5 @ r0<- 4-r0 + FETCH(rINST, 2) @ rINST<- FEDC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) +4: and ip, rINST, #0xf000 @ isolate F + ldr r3, [rFP, ip, lsr #10] @ r3<- vF (shift right 12, left 2) +3: and ip, rINST, #0x0f00 @ isolate E + ldr r2, [rFP, ip, lsr #6] @ r2<- vE +2: and ip, rINST, #0x00f0 @ isolate D + ldr r1, [rFP, ip, lsr #2] @ r1<- vD +1: and ip, rINST, #0x000f @ isolate C + ldr r0, [rFP, ip, lsl #2] @ r0<- vC +0: + ldr rINST, .LOP_EXECUTE_INLINE_table @ table of InlineOperation +5: add rINST, pc + ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) + +.LOP_EXECUTE_INLINE_load_arg4: + FETCH(r1, 0) @ r1<- original rINST + mov r0, r1, lsr #8 + and ip, r0, #0x000f + ldr r0, [rFP, ip, lsl #2] @ r0<- vG + str r0, [sp, #4] + b 4b +#else rsb r0, r0, #4 @ r0<- 4-r0 FETCH(rINST, 2) @ rINST<- FEDC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each @@ -9470,6 +9499,7 @@ d2l_doconv: 5: add rINST, pc ldr pc, [rINST, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +#endif /* * We're debugging or profiling. @@ -9514,10 +9544,17 @@ d2l_doconv: * lr = return addr, above [DO NOT bl out of here w/o preserving LR] */ .LOP_EXECUTE_INLINE_RANGE_continue: - rsb r0, r0, #4 @ r0<- 4-r0 +#ifdef INLINE_ARG_EXPANDED + rsb r0, r0, #7 @ r0<- 4-r0 FETCH(r9, 2) @ r9<- CCCC add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each bl common_abort @ (skipped due to ARM prefetch) +8: b .LOP_EXECUTE_INLINE_RANGE_load_arg6 + bl common_abort @ (skipped due to ARM prefetch) +7: b .LOP_EXECUTE_INLINE_RANGE_load_arg5 + bl common_abort @ (skipped due to ARM prefetch) +6: b .LOP_EXECUTE_INLINE_RANGE_load_arg4 + bl common_abort @ (skipped due to ARM prefetch) 4: add ip, r9, #3 @ base+3 GET_VREG(r3, ip) @ r3<- vBase[3] 3: add ip, r9, #2 @ base+2 @@ -9532,6 +9569,43 @@ d2l_doconv: ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry @ (not reached) +.LOP_EXECUTE_INLINE_RANGE_load_arg6: + add ip, r9, #6 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #12] + b 7b + +.LOP_EXECUTE_INLINE_RANGE_load_arg5: + add ip, r9, #5 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #8] + b 6b + +.LOP_EXECUTE_INLINE_RANGE_load_arg4: + add ip, r9, #4 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] + str r3, [sp, #4] + b 4b + +#else + rsb r0, r0, #4 @ r0<- 4-r0 + FETCH(r9, 2) @ r9<- CCCC + add pc, pc, r0, lsl #3 @ computed goto, 2 instrs each + bl common_abort @ (skipped due to ARM prefetch) +4: add ip, r9, #3 @ base+3 + GET_VREG(r3, ip) @ r3<- vBase[3] +3: add ip, r9, #2 @ base+2 + GET_VREG(r2, ip) @ r2<- vBase[2] +2: add ip, r9, #1 @ base+1 + GET_VREG(r1, ip) @ r1<- vBase[1] +1: add ip, r9, #0 @ (nop) + GET_VREG(r0, ip) @ r0<- vBase[0] +0: + ldr r9, .LOP_EXECUTE_INLINE_RANGE_table @ table of InlineOperation +5: add r9, pc + ldr pc, [r9, r10, lsl #4] @ sizeof=16, "func" is first entry + @ (not reached) +#endif /* * We're debugging or profiling. diff --git a/vm/mterp/out/InterpC-allstubs.cpp b/vm/mterp/out/InterpC-allstubs.cpp index 1ef878370..5258cbcee 100644 --- a/vm/mterp/out/InterpC-allstubs.cpp +++ b/vm/mterp/out/InterpC-allstubs.cpp @@ -2810,6 +2810,60 @@ OP_END /* File: c/OP_EXECUTE_INLINE.cpp */ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4; + arg0 = arg1 = arg2 = arg3 = arg4 = 0; + + EXPORT_PC(); + + vsrc1 = INST_B(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* 0-4 register indices */ + ILOGV("|execute-inline args=%d @%d {regs=0x%04x}", + vsrc1, ref, vdst); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 5); + + switch (vsrc1) { + case 5: + arg4 = GET_REGISTER(INST_A(inst)); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst >> 12); + /* fall through */ + case 3: + arg2 = GET_REGISTER((vdst & 0x0f00) >> 8); + /* fall through */ + case 2: + arg1 = GET_REGISTER((vdst & 0x00f0) >> 4); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst & 0x0f); + /* fall through */ + default: // case 0 + ; + } + + if( vsrc1 == 5 ) { + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp5Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp5Std(arg0, arg1, arg2, arg3, &retval, ref, arg4)) + GOTO_exceptionThrown(); + } + } else { + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp4Dbg(arg0, arg1, arg2, arg3, &retval, ref)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) + GOTO_exceptionThrown(); + } + } + +#else //ifdef INLINE_ARG_EXPANDED /* * This has the same form as other method calls, but we ignore * the 5th argument (vA). This is chiefly because the first four @@ -2864,6 +2918,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END @@ -2871,6 +2926,57 @@ OP_END /* File: c/OP_EXECUTE_INLINE_RANGE.cpp */ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6; + arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0; /* placate gcc */ + arg6 = 0; + + + EXPORT_PC(); + + vsrc1 = INST_AA(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* range base */ + ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}", + vsrc1, ref, vdst, vdst+vsrc1-1); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 7); + + switch (vsrc1) { + case 7: + arg6 = GET_REGISTER(vdst+6); + /* fall through */ + case 6: + arg5 = GET_REGISTER(vdst+5); + /* fall through */ + case 5: + arg4 = GET_REGISTER(vdst+4); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst+3); + /* fall through */ + case 3: + arg2 = GET_REGISTER(vdst+2); + /* fall through */ + case 2: + arg1 = GET_REGISTER(vdst+1); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst+0); + /* fall through */ + default: // case 0 + ; + } + + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } +#else //ifdef INLINE_ARG_EXPANDED u4 arg0, arg1, arg2, arg3; arg0 = arg1 = arg2 = arg3 = 0; /* placate gcc */ @@ -2909,6 +3015,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END diff --git a/vm/mterp/out/InterpC-portable.cpp b/vm/mterp/out/InterpC-portable.cpp index 0328aa883..ee02aa1e8 100644 --- a/vm/mterp/out/InterpC-portable.cpp +++ b/vm/mterp/out/InterpC-portable.cpp @@ -2821,6 +2821,60 @@ OP_END /* File: c/OP_EXECUTE_INLINE.cpp */ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4; + arg0 = arg1 = arg2 = arg3 = arg4 = 0; + + EXPORT_PC(); + + vsrc1 = INST_B(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* 0-4 register indices */ + ILOGV("|execute-inline args=%d @%d {regs=0x%04x}", + vsrc1, ref, vdst); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 5); + + switch (vsrc1) { + case 5: + arg4 = GET_REGISTER(INST_A(inst)); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst >> 12); + /* fall through */ + case 3: + arg2 = GET_REGISTER((vdst & 0x0f00) >> 8); + /* fall through */ + case 2: + arg1 = GET_REGISTER((vdst & 0x00f0) >> 4); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst & 0x0f); + /* fall through */ + default: // case 0 + ; + } + + if( vsrc1 == 5 ) { + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp5Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp5Std(arg0, arg1, arg2, arg3, &retval, ref, arg4)) + GOTO_exceptionThrown(); + } + } else { + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp4Dbg(arg0, arg1, arg2, arg3, &retval, ref)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) + GOTO_exceptionThrown(); + } + } + +#else //ifdef INLINE_ARG_EXPANDED /* * This has the same form as other method calls, but we ignore * the 5th argument (vA). This is chiefly because the first four @@ -2875,6 +2929,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END @@ -2882,6 +2937,57 @@ OP_END /* File: c/OP_EXECUTE_INLINE_RANGE.cpp */ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6; + arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0; /* placate gcc */ + arg6 = 0; + + + EXPORT_PC(); + + vsrc1 = INST_AA(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* range base */ + ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}", + vsrc1, ref, vdst, vdst+vsrc1-1); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 7); + + switch (vsrc1) { + case 7: + arg6 = GET_REGISTER(vdst+6); + /* fall through */ + case 6: + arg5 = GET_REGISTER(vdst+5); + /* fall through */ + case 5: + arg4 = GET_REGISTER(vdst+4); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst+3); + /* fall through */ + case 3: + arg2 = GET_REGISTER(vdst+2); + /* fall through */ + case 2: + arg1 = GET_REGISTER(vdst+1); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst+0); + /* fall through */ + default: // case 0 + ; + } + + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } +#else //ifdef INLINE_ARG_EXPANDED u4 arg0, arg1, arg2, arg3; arg0 = arg1 = arg2 = arg3 = 0; /* placate gcc */ @@ -2920,6 +3026,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END diff --git a/vm/mterp/out/InterpC-x86.cpp b/vm/mterp/out/InterpC-x86.cpp index 77dc8885c..eb8a1e9e4 100644 --- a/vm/mterp/out/InterpC-x86.cpp +++ b/vm/mterp/out/InterpC-x86.cpp @@ -1181,6 +1181,57 @@ OP_END /* File: c/OP_EXECUTE_INLINE_RANGE.cpp */ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) { +#ifdef INLINE_ARG_EXPANDED + u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6; + arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0; /* placate gcc */ + arg6 = 0; + + + EXPORT_PC(); + + vsrc1 = INST_AA(inst); /* #of args */ + ref = FETCH(1); /* inline call "ref" */ + vdst = FETCH(2); /* range base */ + ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}", + vsrc1, ref, vdst, vdst+vsrc1-1); + + assert((vdst >> 16) == 0); // 16-bit type -or- high 16 bits clear + assert(vsrc1 <= 7); + + switch (vsrc1) { + case 7: + arg6 = GET_REGISTER(vdst+6); + /* fall through */ + case 6: + arg5 = GET_REGISTER(vdst+5); + /* fall through */ + case 5: + arg4 = GET_REGISTER(vdst+4); + /* fall through */ + case 4: + arg3 = GET_REGISTER(vdst+3); + /* fall through */ + case 3: + arg2 = GET_REGISTER(vdst+2); + /* fall through */ + case 2: + arg1 = GET_REGISTER(vdst+1); + /* fall through */ + case 1: + arg0 = GET_REGISTER(vdst+0); + /* fall through */ + default: // case 0 + ; + } + + if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) { + if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } else { + if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6)) + GOTO_exceptionThrown(); + } +#else //ifdef INLINE_ARG_EXPANDED u4 arg0, arg1, arg2, arg3; arg0 = arg1 = arg2 = arg3 = 0; /* placate gcc */ @@ -1219,6 +1270,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/) if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)) GOTO_exceptionThrown(); } +#endif //ifdef INLINE_ARG_EXPANDED } FINISH(3); OP_END diff --git a/vm/oo/Class.cpp b/vm/oo/Class.cpp index 2a23a9f9a..a5369fb35 100644 --- a/vm/oo/Class.cpp +++ b/vm/oo/Class.cpp @@ -1845,16 +1845,23 @@ static ClassObject* loadClassFromDex0(DvmDex* pDvmDex, } if (pHeader->instanceFieldsSize != 0) { - int count = (int) pHeader->instanceFieldsSize; - u4 lastIndex = 0; - DexField field; - - newClass->ifieldCount = count; - newClass->ifields = (InstField*) dvmLinearAlloc(classLoader, - count * sizeof(InstField)); - for (i = 0; i < count; i++) { - dexReadClassDataField(&pEncodedData, &field, &lastIndex); - loadIFieldFromDex(newClass, &field, &newClass->ifields[i]); + OptClassMap* optClass = getOptClassHandler(newClass); + + if(optClass != NULL){ + optClass->handleIfield(newClass, classLoader, pHeader, &pEncodedData); + }else{ + int count = (int) pHeader->instanceFieldsSize; + u4 lastIndex = 0; + DexField field; + + newClass->ifieldCount = count; + newClass->ifields = (InstField*) dvmLinearAlloc(classLoader, + count * sizeof(InstField)); + + for (i = 0; i < count; i++) { + dexReadClassDataField(&pEncodedData, &field, &lastIndex); + loadIFieldFromDex(newClass, &field, &newClass->ifields[i]); + } } dvmLinearReadOnly(classLoader, newClass->ifields); } @@ -3914,6 +3921,9 @@ static void initSFields(ClassObject* clazz) } } +void dvmInitSFields(ClassObject* clazz){ + return initSFields(clazz); +} /* * Determine whether "descriptor" yields the same class object in the @@ -4920,3 +4930,8 @@ int dvmCompareNameDescriptorAndMethod(const char* name, return dvmCompareDescriptorAndMethodProto(descriptor, method); } + +__attribute__((weak)) OptClassMap* getOptClassHandler(ClassObject* newClass){ + return NULL; +} + diff --git a/vm/oo/Class.h b/vm/oo/Class.h index 349c66692..ca76f6623 100644 --- a/vm/oo/Class.h +++ b/vm/oo/Class.h @@ -19,6 +19,8 @@ #ifndef DALVIK_OO_CLASS_H_ #define DALVIK_OO_CLASS_H_ +#include "libdex/DexClass.h" + /* * The classpath and bootclasspath differ in that only the latter is * consulted when looking for classes needed by the VM. When searching @@ -281,4 +283,11 @@ int dvmCompareNameDescriptorAndMethod(const char* name, */ size_t dvmClassObjectSize(const ClassObject *clazz); +typedef struct OptClassMap{ + const char* descriptor; + void (*handleIfield) (ClassObject* newClass, Object* classLoader, const DexClassDataHeader* pHeader, const u1** pData); +} OptClassMap; + +OptClassMap* getOptClassHandler(ClassObject* newClass); + #endif // DALVIK_OO_CLASS_H_ |