Merge branch 'kk_2.7_rb1.9' of git://codeaurora.org/platform/dalvik into caf

Change-Id: I885fab2470352d0a625c9946d0d5c9111486b713
author: Steve Kondik <shade@chemlab.org> 2013-11-11 00:32:52 -0800
committer: Steve Kondik <shade@chemlab.org> 2013-11-11 00:32:52 -0800
commit: bab417cc2aceee45238d5648975118bf3dd4c2e9 (patch)
tree: 39f1867dee9fe25cf7174917ef39ea3dd361fca4 /vm
parent: 5531b23c1546fdf896db25f7412291bada6e723c (diff)
parent: e17852495a15ddad079305c725d067ac95e4d655 (diff)
download: android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.tar.gz
android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.tar.bz2
android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.zip
50 files changed, 2460 insertions, 189 deletions
diff --git a/vm/Android.mk b/vm/Android.mk
index e5d5448ce..8cca3f39b 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -39,6 +39,16 @@ else
 endif
 host_smp_flag := -DANDROID_SMP=1
 
+ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
+    target_inline_arg5_flag := -DINLINE_ARG_EXPANDED
+    host_inline_arg5_flag := -DINLINE_ARG_EXPANDED
+else
+    target_inline_arg5_flag :=
+    host_inline_arg5_flag :=
+endif
+
+
+
 # Build the installed version (libdvm.so) first
 WITH_JIT := true
 include $(LOCAL_PATH)/ReconfigureDvm.mk
@@ -55,9 +65,9 @@ ifneq ($(strip $(WITH_ADDRESS_SANITIZER)),)
     LOCAL_CFLAGS := $(filter-out $(CLANG_CONFIG_UNKNOWN_CFLAGS),$(LOCAL_CFLAGS))
 endif
 
+LOCAL_CFLAGS += $(target_inline_arg5_flag)
 # TODO: split out the asflags.
 LOCAL_ASFLAGS := $(LOCAL_CFLAGS)
-
 include $(BUILD_SHARED_LIBRARY)
 
 # Derivation #1
@@ -65,6 +75,7 @@ include $(BUILD_SHARED_LIBRARY)
 include $(LOCAL_PATH)/ReconfigureDvm.mk
 LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \
                 -DWITH_JIT_TUNING $(target_smp_flag)
+LOCAL_CFLAGS += $(target_inline_arg5_flag)
 # TODO: split out the asflags.
 LOCAL_ASFLAGS := $(LOCAL_CFLAGS)
 LOCAL_MODULE := libdvm_assert
@@ -77,6 +88,7 @@ ifneq ($(dvm_arch),mips)    # MIPS support for self-verification is incomplete
     include $(LOCAL_PATH)/ReconfigureDvm.mk
     LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \
                     -DWITH_SELF_VERIFICATION $(target_smp_flag)
+    LOCAL_CFLAGS += $(target_inline_arg5_flag)
     # TODO: split out the asflags.
     LOCAL_ASFLAGS := $(LOCAL_CFLAGS)
     LOCAL_MODULE := libdvm_sv
@@ -135,6 +147,7 @@ ifeq ($(WITH_HOST_DALVIK),true)
     endif
 
     LOCAL_CFLAGS += $(host_smp_flag)
+    LOCAL_CFLAGS += $(host_inline_arg5_flag)
     # TODO: split out the asflags.
     LOCAL_ASFLAGS := $(LOCAL_CFLAGS)
     LOCAL_MODULE_TAGS := optional
diff --git a/vm/DalvikCrashDump.cpp b/vm/DalvikCrashDump.cpp
new file mode 100644
index 000000000..c940d3ebf
--- /dev/null
+++ b/vm/DalvikCrashDump.cpp
@@ -0,0 +1,31 @@
+/**
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ * Not a Contribution, Apache license notifications and license are retained
+ * for attribution purposes only.
+ *
+ * Copyright (c) 2005-2008, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.  You may
+ * obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * Support files for dump Dalvik info during crash from debuggerd
+ **/
+
+#include <Dalvik.h>
+#include "DalvikCrashDump.h"
+
+/* Add hook to dump dalvik information */
+__attribute__ ((weak))
+void dump_dalvik(ptrace_context_t* context, log_t* log, pid_t tid, bool at_fault)
+{
+    ALOGE("[Dalvik] No information available \n");
+}
diff --git a/vm/DalvikCrashDump.h b/vm/DalvikCrashDump.h
new file mode 100644
index 000000000..633179448
--- /dev/null
+++ b/vm/DalvikCrashDump.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met,
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of The Linux Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Hooks for debuggerd to call into libdvm during a crash to dump
+ * dalvik related information
+ **/
+
+#ifndef DALVIK_CRASH_DUMP_H
+#define DALVIK_CRASH_DUMP_H
+
+#include <corkscrew/ptrace.h>
+
+#ifdef HAS_LIBDVM
+#  include <utility.h>
+#else
+#  include <../system/core/debuggerd/utility.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void dump_dalvik (ptrace_context_t* context, log_t* log, pid_t tid, bool at_fault);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/vm/DalvikVersion.h b/vm/DalvikVersion.h
index e71c8393c..da67439c4 100644
--- a/vm/DalvikVersion.h
+++ b/vm/DalvikVersion.h
@@ -32,6 +32,6 @@
  * way classes load changes, e.g. field ordering or vtable layout.  Changing
  * this guarantees that the optimized form of the DEX file is regenerated.
  */
-#define DALVIK_VM_BUILD         27
+#define DALVIK_VM_BUILD         28 /* Increment for device extension */
 
 #endif  // DALVIK_VERSION_H_
diff --git a/vm/Dvm.mk b/vm/Dvm.mk
index 2af05823b..7d8355cf2 100644
--- a/vm/Dvm.mk
+++ b/vm/Dvm.mk
@@ -239,6 +239,7 @@ ifeq ($(dvm_arch),arm)
   #LOCAL_CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=vfp
   LOCAL_CFLAGS += -Werror
   MTERP_ARCH_KNOWN := true
+
   # Select architecture-specific sources (armv5te, armv7-a, etc.)
   LOCAL_SRC_FILES += \
 		arch/arm/CallOldABI.S \
@@ -248,6 +249,9 @@ ifeq ($(dvm_arch),arm)
 		mterp/out/InterpAsm-$(dvm_arch_variant).S
 
   ifeq ($(WITH_JIT),true)
+    # Debuggerd support
+    LOCAL_SRC_FILES += DalvikCrashDump.cpp
+
     LOCAL_SRC_FILES += \
 		compiler/codegen/RallocUtil.cpp \
 		compiler/codegen/arm/$(dvm_arch_variant)/Codegen.cpp \
@@ -259,6 +263,12 @@ ifeq ($(dvm_arch),arm)
 		compiler/codegen/arm/ArmRallocUtil.cpp \
 		compiler/template/out/CompilerTemplateAsm-$(dvm_arch_variant).S
   endif
+
+  ifeq ($(WITH_QC_PERF),true)
+    LOCAL_WHOLE_STATIC_LIBRARIES += libqc-dalvik
+    LOCAL_SHARED_LIBRARIES += libqc-opt
+    LOCAL_CFLAGS += -DWITH_QC_PERF
+  endif
 endif
 
 ifeq ($(dvm_arch),mips)
diff --git a/vm/Globals.h b/vm/Globals.h
index 29f7356ad..bfd2b7e12 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -312,6 +312,7 @@ struct DvmGlobals {
     ClassObject* exNoSuchFieldException;
     ClassObject* exNoSuchMethodError;
     ClassObject* exNullPointerException;
+    ClassObject* exNumberFormatException;
     ClassObject* exOutOfMemoryError;
     ClassObject* exRuntimeException;
     ClassObject* exStackOverflowError;
diff --git a/vm/Init.cpp b/vm/Init.cpp
index f1762c96c..1a99e12ab 100644
--- a/vm/Init.cpp
+++ b/vm/Init.cpp
@@ -1374,6 +1374,13 @@ private:
 };
 
 /*
+ * Hook for post-init functions
+ */
+__attribute__((weak)) void dvmPostInitZygote(void) {
+    ;
+}
+
+/*
  * VM initialization.  Pass in any options provided on the command line.
  * Do not pass in the class name or the options for the class.
  *
@@ -1572,6 +1579,7 @@ std::string dvmStartup(int argc, const char* const argv[],
         if (!initZygote()) {
             return "initZygote failed";
         }
+        dvmPostInitZygote();
     } else {
         if (!dvmInitAfterZygote()) {
             return "dvmInitAfterZygote failed";
diff --git a/vm/InitRefs.cpp b/vm/InitRefs.cpp
index 08c28f856..06f99e7a9 100644
--- a/vm/InitRefs.cpp
+++ b/vm/InitRefs.cpp
@@ -104,6 +104,7 @@ static bool initClassReferences() {
         { &gDvm.exNoSuchFieldException,            "Ljava/lang/NoSuchFieldException;" },
         { &gDvm.exNoSuchMethodError,               "Ljava/lang/NoSuchMethodError;" },
         { &gDvm.exNullPointerException,            "Ljava/lang/NullPointerException;" },
+        { &gDvm.exNumberFormatException,           "Ljava/lang/NumberFormatException;" },
         { &gDvm.exOutOfMemoryError,                "Ljava/lang/OutOfMemoryError;" },
         { &gDvm.exRuntimeException,                "Ljava/lang/RuntimeException;" },
         { &gDvm.exStackOverflowError,              "Ljava/lang/StackOverflowError;" },
@@ -477,6 +478,10 @@ static bool verifyStringOffsets() {
     return ok;
 }
 
+__attribute__((weak)) bool verifyExtra(){
+    return true;
+}
+
 /* (documented in header) */
 bool dvmFindRequiredClassesAndMembers() {
     /*
@@ -491,7 +496,8 @@ bool dvmFindRequiredClassesAndMembers() {
         && initDirectMethodReferences()
         && initVirtualMethodOffsets()
         && initFinalizerReference()
-        && verifyStringOffsets();
+        && verifyStringOffsets()
+        && verifyExtra();
 }
 
 /* (documented in header) */
diff --git a/vm/InlineNative.cpp b/vm/InlineNative.cpp
index 00c1e9554..97f4d3964 100644
--- a/vm/InlineNative.cpp
+++ b/vm/InlineNative.cpp
@@ -913,3 +913,40 @@ bool dvmPerformInlineOp4Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
     TRACE_METHOD_EXIT(self, method);
     return result;
 }
+
+#ifdef INLINE_ARG_EXPANDED
+bool dvmPerformInlineOp5Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, int opIndex, u4 arg4)
+{
+    Method* method = dvmResolveInlineNative(opIndex);
+    if (method == NULL) {
+        return ((InlineOp5Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3,
+            pResult, arg4);
+    }
+
+    Thread* self = dvmThreadSelf();
+    TRACE_METHOD_ENTER(self, method);
+    bool result = ((InlineOp5Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3,
+        pResult, arg4);
+    TRACE_METHOD_EXIT(self, method);
+    return result;
+}
+
+bool dvmPerformInlineOp7Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, int opIndex, u4 arg4, u4 arg5, u4 arg6)
+{
+    Method* method = dvmResolveInlineNative(opIndex);
+    if (method == NULL) {
+        return ((InlineOp7Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3,
+            pResult, arg4, arg5, arg6);
+    }
+
+    Thread* self = dvmThreadSelf();
+    TRACE_METHOD_ENTER(self, method);
+    bool result = ((InlineOp7Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3,
+        pResult, arg4, arg5, arg6);
+    TRACE_METHOD_EXIT(self, method);
+    return result;
+}
+#endif
+
diff --git a/vm/InlineNative.h b/vm/InlineNative.h
index fe14f8bf8..5d3dd13ec 100644
--- a/vm/InlineNative.h
+++ b/vm/InlineNative.h
@@ -33,6 +33,12 @@ Method* dvmFindInlinableMethod(const char* classDescriptor,
 typedef bool (*InlineOp4Func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
     JValue* pResult);
 
+#ifdef INLINE_ARG_EXPANDED
+typedef bool (*InlineOp5Func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, u4 arg4);
+typedef bool (*InlineOp7Func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, u4 arg4, u4 arg5, u4 arg6);
+#endif
 /*
  * Table of inline operations.
  *
@@ -117,12 +123,45 @@ INLINE bool dvmPerformInlineOp4Std(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
     return (*gDvmInlineOpsTable[opIndex].func)(arg0, arg1, arg2, arg3, pResult);
 }
 
+#ifdef INLINE_ARG_EXPANDED
+/*
+ * Perform the operation specified by "opIndex".
+ *
+ * We want the arguments to appear in the first 4 registers so they can
+ * be passed straight through to the handler function.  Ideally on ARM
+ * they'll go into r0-r3 and stay there.
+ *
+ * Returns "true" if everything went normally, "false" if an exception
+ * was thrown.
+ */
+INLINE bool dvmPerformInlineOp5Std(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, int opIndex, u4 arg4)
+{
+    return ((InlineOp5Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, pResult, arg4);
+}
+
+INLINE bool dvmPerformInlineOp7Std(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, int opIndex, u4 arg4, u4 arg5, u4 arg6)
+{
+    return ((InlineOp7Func)(*gDvmInlineOpsTable[opIndex].func))(arg0, arg1, arg2, arg3, pResult, arg4, arg5, arg6);
+}
+
+#endif
 /*
  * Like the "std" version, but will emit profiling info.
  */
 bool dvmPerformInlineOp4Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
     JValue* pResult, int opIndex);
 
+#ifdef INLINE_ARG_EXPANDED
+/*
+ * Like the "std" version, but will emit profiling info.
+ */
+bool dvmPerformInlineOp5Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, int opIndex, u4 arg4);
+bool dvmPerformInlineOp7Dbg(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+    JValue* pResult, int opIndex, u4 arg4, u4 arg5, u4 arg6);
+#endif
 /*
  * Return method & populate the table on first use.
  */
diff --git a/vm/Sync.h b/vm/Sync.h
index 2016c0381..eab563281 100644
--- a/vm/Sync.h
+++ b/vm/Sync.h
@@ -38,6 +38,8 @@
 #define LW_HASH_STATE_MASK 0x3
 #define LW_HASH_STATE_SHIFT 1
 #define LW_HASH_STATE(x) (((x) >> LW_HASH_STATE_SHIFT) & LW_HASH_STATE_MASK)
+#define LW_HASH_STATE_SIZE 2
+#define LW_HASH_STATE_ABS_MASK 0x6
 
 /*
  * Monitor accessor.  Extracts a monitor structure pointer from a fat
diff --git a/vm/alloc/Heap.cpp b/vm/alloc/Heap.cpp
index c11ff79dd..b108f90a2 100644
--- a/vm/alloc/Heap.cpp
+++ b/vm/alloc/Heap.cpp
@@ -29,16 +29,17 @@
 #include "alloc/HeapSource.h"
 #include "alloc/MarkSweep.h"
 #include "os/os.h"
-
 #include <sys/mman.h>
+#include "hprof/Hprof.h"
 #include <sys/resource.h>
 #include <sys/time.h>
 #include <limits.h>
 #include <errno.h>
-
 #include <cutils/trace.h>
-#ifdef __BIONIC__
-#include <cutils/properties.h>
+#include <cutils/process_name.h>
+
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
 
 static int debugalloc()
 {
@@ -196,8 +197,13 @@ static void gcForMalloc(bool clearSoftReferences)
  */
 static void *tryMalloc(size_t size)
 {
+#ifdef HAVE_ANDROID_OS
+    char prop_value[PROPERTY_VALUE_MAX] = {'\0'};
+#endif
+    char* hprof_file = NULL;
     void *ptr;
-
+    int result = -1;
+    int debug_oom = 0;
 //TODO: figure out better heuristics
 //    There will be a lot of churn if someone allocates a bunch of
 //    big objects in a row, and we hit the frag case each time.
@@ -212,7 +218,6 @@ static void *tryMalloc(size_t size)
     if (ptr != NULL) {
         return ptr;
     }
-
     /*
      * The allocation failed.  If the GC is running, block until it
      * completes and retry.
@@ -252,7 +257,6 @@ static void *tryMalloc(size_t size)
                 FRACTIONAL_MB(newHeapSize), size);
         return ptr;
     }
-
     /* Most allocations should have succeeded by now, so the heap
      * is really full, really fragmented, or the requested size is
      * really big.  Do another GC, collecting SoftReferences this
@@ -273,6 +277,46 @@ static void *tryMalloc(size_t size)
 //TODO: tell the HeapSource to dump its state
     dvmDumpThread(dvmThreadSelf(), false);
 
+#ifdef HAVE_ANDROID_OS
+    /* Read the property to check whether hprof should be generated or not */
+    property_get("dalvik.debug.oom",prop_value,"0");
+    debug_oom = atoi(prop_value);
+#endif
+    if(debug_oom == 1) {
+        LOGE_HEAP("Generating hprof for process: %s PID: %d",
+                    get_process_name(),getpid());
+        dvmUnlockHeap();
+
+        /* allocate memory for hprof file name. Allocate approx 30 bytes.
+         * 11 byte for directory path, 10 bytes for pid, 6 bytes for
+         * extension + "\0'.
+         */
+        hprof_file = (char*) malloc (sizeof(char) * 30);
+
+        /* creation of hprof will fail if /data/misc permission is not set
+         * to 0777.
+         */
+
+        if(hprof_file) {
+            snprintf(hprof_file,30,"/data/misc/%d.hprof",getpid());
+            LOGE_HEAP("Generating hprof in file: %s",hprof_file );
+
+            result = hprofDumpHeap(hprof_file, -1, false);
+            free(hprof_file);
+        } else {
+            LOGE_HEAP("Failed to allocate memory for file name."
+                      "Generating hprof in default file: /data/misc/app_oom.hprof");
+            result = hprofDumpHeap("/data/misc/app_oom.hprof", -1, false);
+        }
+        dvmLockMutex(&gDvm.gcHeapLock);
+
+        if (result != 0) {
+            /* ideally we'd throw something more specific based on actual failure */
+            dvmThrowRuntimeException(
+                "Failure during heap dump; check log output for details");
+            LOGE_HEAP(" hprofDumpHeap failed with result: %d ",result);
+        }
+    }
     return NULL;
 }
 
diff --git a/vm/analysis/Optimize.cpp b/vm/analysis/Optimize.cpp
index b61b82c18..3e955cbbb 100644
--- a/vm/analysis/Optimize.cpp
+++ b/vm/analysis/Optimize.cpp
@@ -50,7 +50,6 @@ static bool rewriteExecuteInlineRange(Method* method, u2* insns,
 static void rewriteReturnVoid(Method* method, u2* insns);
 static bool needsReturnBarrier(Method* method);
 
-
 /*
  * Create a table of inline substitutions.  Sets gDvm.inlineSubs.
  *
@@ -968,6 +967,8 @@ static bool rewriteInvokeObjectInit(Method* method, u2* insns)
 
         LOGVV("DexOpt: replaced Object.<init> in %s.%s",
             method->clazz->descriptor, method->name);
+    }else{
+        return false;
     }
 
     return true;
diff --git a/vm/compiler/CompilerIR.h b/vm/compiler/CompilerIR.h
index 73efad834..a97ccb10f 100644
--- a/vm/compiler/CompilerIR.h
+++ b/vm/compiler/CompilerIR.h
@@ -76,6 +76,7 @@ typedef struct ChainCellCounts {
         u1 count[kChainingCellLast]; /* include one more space for the gap # */
         u4 dummyForAlignment;
     } u;
+    u4 extraSize;
 } ChainCellCounts;
 
 typedef struct LIR {
@@ -175,6 +176,8 @@ typedef struct BasicBlock {
         BlockListType blockListType;    // switch and exception handling
         GrowableList blocks;
     } successorBlockList;
+
+    LIR *blockLabelLIR;
 } BasicBlock;
 
 /*
@@ -213,6 +216,7 @@ typedef struct CompilationUnit {
     int numClassPointers;
     LIR *chainCellOffsetLIR;
     GrowableList pcReconstructionList;
+    GrowableList pcReconstructionListExtended;
     int headerSize;                     // bytes before the first code ptr
     int dataOffset;                     // starting offset of literal pool
     int totalSize;                      // header + code size
@@ -228,9 +232,11 @@ typedef struct CompilationUnit {
     bool heapMemOp;                     // Mark mem ops for self verification
     bool usesLinkRegister;              // For self-verification only
     int profileCodeSize;                // Size of the profile prefix in bytes
+    GrowableList chainingListByType[kChainingCellGap];
     int numChainingCells[kChainingCellGap];
     LIR *firstChainingLIR[kChainingCellGap];
     LIR *chainingCellBottom;
+    int chainingCellExtraSize;
     struct RegisterPool *regPool;
     int optRound;                       // round number to tell an LIR's age
     jmp_buf *bailPtr;
@@ -248,6 +254,7 @@ typedef struct CompilationUnit {
 
     /* Data structure for loop analysis and optimizations */
     struct LoopAnalysis *loopAnalysis;
+    bool hasHoistedChecks;
 
     /* Map SSA names to location */
     RegLocation *regLocation;
@@ -278,6 +285,12 @@ typedef struct CompilationUnit {
     bool printSSANames;
     void *blockLabelList;
     bool quitLoopMode;                  // cold path/complex bytecode
+    void *labelList;
+    bool setCCode;                      // gen instruction that sets ccodes
+                                        // the flag must be set before calling
+                                        // codegen function and reset upon completion
+
+    void *extraData;                    // placeholder
 } CompilationUnit;
 
 #if defined(WITH_SELF_VERIFICATION)
diff --git a/vm/compiler/Dataflow.cpp b/vm/compiler/Dataflow.cpp
index 7bed8396a..e60931489 100644
--- a/vm/compiler/Dataflow.cpp
+++ b/vm/compiler/Dataflow.cpp
@@ -814,6 +814,22 @@ int dvmConvertSSARegToDalvik(const CompilationUnit *cUnit, int ssaReg)
 }
 
 /*
+ * Utility function to populate attributes based on the DEX opcode
+ */
+__attribute__((weak)) int dvmGetDexOptAttributes(const DecodedInstruction* instr)
+{
+    int result = 0;
+    if (instr) {
+        Opcode  opcode = instr->opcode;
+        if ((opcode >= OP_NOP) && (opcode < (Opcode)kMirOpLast)) {
+            result = dvmCompilerDataFlowAttributes[opcode];
+        }
+    }
+
+    return result;
+}
+
+/*
  * Utility function to convert encoded SSA register value into Dalvik register
  * and subscript pair. Each SSA register can be used to index the
  * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping.
@@ -823,9 +839,12 @@ char *dvmCompilerGetDalvikDisassembly(const DecodedInstruction *insn,
 {
     char buffer[256];
     Opcode opcode = insn->opcode;
-    int dfAttributes = dvmCompilerDataFlowAttributes[opcode];
+    int dfAttributes = 0;
     int flags;
     char *ret;
+    if (insn) {
+        dfAttributes = dvmGetDexOptAttributes(insn);
+    }
 
     buffer[0] = 0;
     if ((int)opcode >= (int)kMirOpFirst) {
@@ -925,11 +944,14 @@ char *dvmCompilerFullDisassembler(const CompilationUnit *cUnit,
     char operand0[256], operand1[256];
     const DecodedInstruction *insn = &mir->dalvikInsn;
     int opcode = insn->opcode;
-    int dfAttributes = dvmCompilerDataFlowAttributes[opcode];
+    int dfAttributes = 0;
     char *ret;
     int length;
     OpcodeFlags flags;
 
+    if (insn) {
+        dvmGetDexOptAttributes(insn);
+    }
     buffer[0] = 0;
     if (opcode >= kMirOpFirst) {
         if (opcode == kMirOpPhi) {
@@ -1121,7 +1143,7 @@ bool dvmCompilerFindLocalLiveIn(CompilationUnit *cUnit, BasicBlock *bb)
 
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         int dfAttributes =
-            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode];
+            dvmGetDexOptAttributes(&mir->dalvikInsn);
         DecodedInstruction *dInsn = &mir->dalvikInsn;
 
         if (dfAttributes & DF_HAS_USES) {
@@ -1211,6 +1233,16 @@ static void dataFlowSSAFormat3RC(CompilationUnit *cUnit, MIR *mir)
     }
 }
 
+/* check for invoke instructions in the loop trace */
+__attribute__((weak)) void dvmCompilerCheckMIR(CompilationUnit *cUnit, MIR *mir)
+{
+    int flags = dexGetFlagsFromOpcode(mir->dalvikInsn.opcode);
+
+    if (flags & kInstrInvoke) {
+        cUnit->hasInvoke = true;
+    }
+}
+
 /* Entry function to convert a block into SSA representation */
 bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
 {
@@ -1219,11 +1251,14 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
     if (bb->dataFlowInfo == NULL) return false;
 
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+
+        dvmCompilerCheckMIR(cUnit, mir);
+
         mir->ssaRep = (struct SSARepresentation *)
             dvmCompilerNew(sizeof(SSARepresentation), true);
 
         int dfAttributes =
-            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode];
+            dvmGetDexOptAttributes(&mir->dalvikInsn);
 
         int numUses = 0;
 
@@ -1261,6 +1296,8 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
                                                       false);
             mir->ssaRep->fpUse = (bool *)dvmCompilerNew(sizeof(bool) * numUses,
                                                 false);
+            mir->ssaRep->wideUse = (bool *)dvmCompilerNew(sizeof(bool) * numUses,
+                                                false);
         }
 
         int numDefs = 0;
@@ -1278,6 +1315,8 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
                                                       false);
             mir->ssaRep->fpDef = (bool *)dvmCompilerNew(sizeof(bool) * numDefs,
                                                         false);
+            mir->ssaRep->wideDef = (bool *)dvmCompilerNew(sizeof(bool) * numDefs,
+                                                        false);
         }
 
         DecodedInstruction *dInsn = &mir->dalvikInsn;
@@ -1286,37 +1325,48 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
             numUses = 0;
             if (dfAttributes & DF_UA) {
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_A;
+                mir->ssaRep->wideUse[numUses] = false;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA, numUses++);
             } else if (dfAttributes & DF_UA_WIDE) {
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_A;
+                mir->ssaRep->wideUse[numUses] = true;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA, numUses++);
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_A;
+                mir->ssaRep->wideUse[numUses] = true;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vA+1, numUses++);
             }
             if (dfAttributes & DF_UB) {
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_B;
+                mir->ssaRep->wideUse[numUses] = false;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB, numUses++);
             } else if (dfAttributes & DF_UB_WIDE) {
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_B;
+                mir->ssaRep->wideUse[numUses] = true;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB, numUses++);
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_B;
+                mir->ssaRep->wideUse[numUses] = true;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vB+1, numUses++);
             }
             if (dfAttributes & DF_UC) {
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_C;
+                mir->ssaRep->wideUse[numUses] = false;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC, numUses++);
             } else if (dfAttributes & DF_UC_WIDE) {
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_C;
+                mir->ssaRep->wideUse[numUses] = true;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC, numUses++);
                 mir->ssaRep->fpUse[numUses] = dfAttributes & DF_FP_C;
+                mir->ssaRep->wideUse[numUses] = true;
                 handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC+1, numUses++);
             }
         }
         if (dfAttributes & DF_HAS_DEFS) {
             mir->ssaRep->fpDef[0] = dfAttributes & DF_FP_A;
+            mir->ssaRep->wideDef[0] = dfAttributes & DF_DA_WIDE;
             handleSSADef(cUnit, mir->ssaRep->defs, dInsn->vA, 0);
             if (dfAttributes & DF_DA_WIDE) {
                 mir->ssaRep->fpDef[1] = dfAttributes & DF_FP_A;
+                mir->ssaRep->wideDef[1] = true;
                 handleSSADef(cUnit, mir->ssaRep->defs, dInsn->vA+1, 1);
             }
         }
@@ -1339,7 +1389,7 @@ bool dvmCompilerDoSSAConversion(CompilationUnit *cUnit, BasicBlock *bb)
 /* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */
 static void setConstant(CompilationUnit *cUnit, int ssaReg, int value)
 {
-    dvmSetBit(cUnit->isConstantV, ssaReg);
+    dvmCompilerSetBit(cUnit->isConstantV, ssaReg);
     cUnit->constantValues[ssaReg] = value;
 }
 
@@ -1350,7 +1400,7 @@ bool dvmCompilerDoConstantPropagation(CompilationUnit *cUnit, BasicBlock *bb)
 
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         int dfAttributes =
-            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode];
+            dvmGetDexOptAttributes(&mir->dalvikInsn);
 
         DecodedInstruction *dInsn = &mir->dalvikInsn;
 
@@ -1437,7 +1487,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit,
     /* Find basic induction variable first */
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         int dfAttributes =
-            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode];
+            dvmGetDexOptAttributes(&mir->dalvikInsn);
 
         if (!(dfAttributes & DF_IS_LINEAR)) continue;
 
@@ -1481,7 +1531,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit,
                         break;
                 }
                 if (deltaIsConstant) {
-                    dvmSetBit(isIndVarV, mir->ssaRep->uses[0]);
+                    dvmCompilerSetBit(isIndVarV, mir->ssaRep->uses[0]);
                     InductionVariableInfo *ivInfo = (InductionVariableInfo *)
                         dvmCompilerNew(sizeof(InductionVariableInfo),
                                        false);
@@ -1502,7 +1552,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit,
     /* Find dependent induction variable now */
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         int dfAttributes =
-            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode];
+            dvmGetDexOptAttributes(&mir->dalvikInsn);
 
         if (!(dfAttributes & DF_IS_LINEAR)) continue;
 
@@ -1554,7 +1604,7 @@ bool dvmCompilerFindInductionVariables(struct CompilationUnit *cUnit,
 
             if (cIsConstant) {
                 unsigned int i;
-                dvmSetBit(isIndVarV, mir->ssaRep->defs[0]);
+                dvmCompilerSetBit(isIndVarV, mir->ssaRep->defs[0]);
                 InductionVariableInfo *ivInfo = (InductionVariableInfo *)
                     dvmCompilerNew(sizeof(InductionVariableInfo),
                                    false);
diff --git a/vm/compiler/Dataflow.h b/vm/compiler/Dataflow.h
index f04c91ca8..67b14c559 100644
--- a/vm/compiler/Dataflow.h
+++ b/vm/compiler/Dataflow.h
@@ -97,9 +97,11 @@ typedef struct SSARepresentation {
     int numUses;
     int *uses;
     bool *fpUse;
+    bool *wideUse;
     int numDefs;
     int *defs;
     bool *fpDef;
+    bool *wideDef;
 } SSARepresentation;
 
 /*
@@ -125,4 +127,6 @@ typedef struct ArrayAccessInfo {
 #define DECODE_REG(v)                   (v & 0xffff)
 #define DECODE_SUB(v)                   (((unsigned int) v) >> 16)
 
+extern int dvmGetDexOptAttributes(const DecodedInstruction* instr);
+
 #endif  // DALVIK_VM_DATAFLOW_H_
diff --git a/vm/compiler/Frontend.cpp b/vm/compiler/Frontend.cpp
index 47c1898a0..65ef1ebd9 100644
--- a/vm/compiler/Frontend.cpp
+++ b/vm/compiler/Frontend.cpp
@@ -219,11 +219,11 @@ static int analyzeInlineTarget(DecodedInstruction *dalvikInsn, int attributes,
     }
 
     if (!(flags & kInstrCanReturn)) {
-        if (!(dvmCompilerDataFlowAttributes[dalvikOpcode] &
+        if (!(dvmGetDexOptAttributes(dalvikInsn) &
               DF_IS_GETTER)) {
             attributes &= ~METHOD_IS_GETTER;
         }
-        if (!(dvmCompilerDataFlowAttributes[dalvikOpcode] &
+        if (!(dvmGetDexOptAttributes(dalvikInsn) &
               DF_IS_SETTER)) {
             attributes &= ~METHOD_IS_SETTER;
         }
@@ -1355,6 +1355,41 @@ bool dvmCompileMethod(const Method *method, JitTranslationInfo *info)
     return false;
 }
 
+/*
+ * Utility funtion to check the DEX opcode for correctness
+ */
+__attribute__((weak)) bool dvmVerifyDex(CompilationUnit *cUnit, BasicBlock *curBlock,
+                                        const u2* codePtr, MIR *insn)
+{
+    bool result = false;
+    if (insn) {
+        if ((insn->dalvikInsn.opcode >= OP_NOP) &&
+            (insn->dalvikInsn.opcode < OP_UNUSED_FF)) {
+            result = true;
+        }
+    }
+    return result;
+}
+
+/* dump simple trace property */
+__attribute__((weak)) void dvmDumpLoopTraceStats(CompilationUnit *cUnit)
+{
+    if(cUnit->printMe){
+        ALOGV("hasInvoke %d",cUnit->hasInvoke);
+    }
+}
+
+/* dump reglocation info of a loop trace */
+__attribute__((weak)) void dvmCompilerDumpRegLocationInfo(CompilationUnit *cUnit)
+{
+    if(cUnit->printMe){
+        int i;
+        for (i=0; i< cUnit->numSSARegs; i++) {
+            ALOGV("LOC %d:%d",i,cUnit->regLocation[i].sRegLow);
+        }
+    }
+}
+
 /* Extending the trace by crawling the code from curBlock */
 static bool exhaustTrace(CompilationUnit *cUnit, BasicBlock *curBlock)
 {
@@ -1392,6 +1427,7 @@ static bool exhaustTrace(CompilationUnit *cUnit, BasicBlock *curBlock)
         if (width == 0)
             break;
 
+        dvmVerifyDex(cUnit, curBlock, codePtr + width, insn);
         dvmCompilerAppendMIR(curBlock, insn);
 
         codePtr += width;
@@ -1464,11 +1500,15 @@ static bool compileLoop(CompilationUnit *cUnit, unsigned int startOffset,
 
     cUnit->jitMode = kJitLoop;
 
+    /* reset number of insns in the trace */
+    cUnit->numInsts=0;
+
     /* Initialize the block list */
     dvmInitGrowableList(&cUnit->blockList, 4);
 
     /* Initialize the PC reconstruction list */
     dvmInitGrowableList(&cUnit->pcReconstructionList, 8);
+    dvmInitGrowableList(&cUnit->pcReconstructionListExtended, 1);
 
     /* Create the default entry and exit blocks and enter them to the list */
     BasicBlock *entryBlock = dvmCompilerNewBB(kEntryBlock, numBlocks++);
@@ -1530,6 +1570,8 @@ static bool compileLoop(CompilationUnit *cUnit, unsigned int startOffset,
     if (!dvmCompilerBuildLoop(cUnit))
         goto bail;
 
+    dvmDumpLoopTraceStats(cUnit);
+
     dvmCompilerLoopOpt(cUnit);
 
     /*
@@ -1547,6 +1589,8 @@ static bool compileLoop(CompilationUnit *cUnit, unsigned int startOffset,
     /* Allocate Registers using simple local allocation scheme */
     dvmCompilerLocalRegAlloc(cUnit);
 
+    dvmCompilerDumpRegLocationInfo(cUnit);
+
     /* Convert MIR to LIR, etc. */
     dvmCompilerMIR2LIR(cUnit);
 #endif
@@ -1693,6 +1737,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
     /* Initialize the PC reconstruction list */
     dvmInitGrowableList(&cUnit.pcReconstructionList, 8);
 
+    dvmInitGrowableList(&cUnit.pcReconstructionListExtended, 1);
     /* Initialize the basic block list */
     blockList = &cUnit.blockList;
     dvmInitGrowableList(blockList, 8);
@@ -1800,6 +1845,7 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
     curBB = dvmCompilerNewBB(kEntryBlock, numBlocks++);
     dvmInsertGrowableList(blockList, (intptr_t) curBB);
     curBB->startOffset = curOffset;
+    cUnit.entryBlock = curBB;
 
     entryCodeBB = dvmCompilerNewBB(kDalvikByteCode, numBlocks++);
     dvmInsertGrowableList(blockList, (intptr_t) entryCodeBB);
@@ -1826,9 +1872,12 @@ bool dvmCompileTrace(JitTraceDescription *desc, int numMaxInsts,
         /* The trace should never incude instruction data */
         assert(width);
         insn->width = width;
+
+        dvmVerifyDex(&cUnit, curBB, codePtr + width, insn);
         traceSize += width;
         dvmCompilerAppendMIR(curBB, insn);
-        cUnit.numInsts++;
+        /* assign seqNum to each insn in the trace */
+        insn->seqNum = cUnit.numInsts++;
 
         int flags = dexGetFlagsFromOpcode(insn->dalvikInsn.opcode);
 
diff --git a/vm/compiler/InlineTransformation.cpp b/vm/compiler/InlineTransformation.cpp
index 650340c5e..24cf052ff 100644
--- a/vm/compiler/InlineTransformation.cpp
+++ b/vm/compiler/InlineTransformation.cpp
@@ -69,7 +69,7 @@ static bool inlineGetter(CompilationUnit *cUnit,
         return false;
     }
 
-    int dfFlags = dvmCompilerDataFlowAttributes[getterInsn.opcode];
+    int dfFlags = dvmGetDexOptAttributes(&getterInsn);
 
     /* Expecting vA to be the destination register */
     if (dfFlags & (DF_UA | DF_UA_WIDE)) {
@@ -156,7 +156,7 @@ static bool inlineSetter(CompilationUnit *cUnit,
     if (!dvmCompilerCanIncludeThisInstruction(calleeMethod, &setterInsn))
         return false;
 
-    int dfFlags = dvmCompilerDataFlowAttributes[setterInsn.opcode];
+    int dfFlags = dvmGetDexOptAttributes(&setterInsn);
 
     if (dfFlags & (DF_UA | DF_UA_WIDE)) {
         setterInsn.vA = convertRegId(&invokeMIR->dalvikInsn, calleeMethod,
@@ -222,12 +222,26 @@ static bool inlineSetter(CompilationUnit *cUnit,
     return true;
 }
 
+/* skip inlining certain method */
+__attribute__((weak)) bool dvmSkipInlineThisMethod(CompilationUnit *cUnit,
+                                                const Method *calleeMethod,
+                                                MIR *invokeMIR,
+                                                BasicBlock *invokeBB,
+                                                bool isPredicted,
+                                                bool isRange)
+{
+    return false;
+}
+
 static bool tryInlineSingletonCallsite(CompilationUnit *cUnit,
                                        const Method *calleeMethod,
                                        MIR *invokeMIR,
                                        BasicBlock *invokeBB,
                                        bool isRange)
 {
+    if (dvmSkipInlineThisMethod(cUnit, calleeMethod, invokeMIR, invokeBB, false, isRange))
+        return true;
+
     /* Not a Java method */
     if (dvmIsNativeMethod(calleeMethod)) return false;
 
@@ -276,6 +290,9 @@ static bool tryInlineVirtualCallsite(CompilationUnit *cUnit,
                                      BasicBlock *invokeBB,
                                      bool isRange)
 {
+    if (dvmSkipInlineThisMethod(cUnit, calleeMethod, invokeMIR, invokeBB, true, isRange))
+        return true;
+
     /* Not a Java method */
     if (dvmIsNativeMethod(calleeMethod)) return false;
 
diff --git a/vm/compiler/IntermediateRep.cpp b/vm/compiler/IntermediateRep.cpp
index db68c3c85..94e102ee9 100644
--- a/vm/compiler/IntermediateRep.cpp
+++ b/vm/compiler/IntermediateRep.cpp
@@ -25,6 +25,7 @@ BasicBlock *dvmCompilerNewBB(BBType blockType, int blockId)
     bb->id = blockId;
     bb->predecessors = dvmCompilerAllocBitVector(blockId > 32 ? blockId : 32,
                                                  true /* expandable */);
+    bb->blockLabelLIR=NULL;
     return bb;
 }
 
@@ -117,6 +118,7 @@ void dvmCompilerInsertLIRBefore(LIR *currentLIR, LIR *newLIR)
  */
 void dvmCompilerInsertLIRAfter(LIR *currentLIR, LIR *newLIR)
 {
+    assert(currentLIR->next != NULL);
     newLIR->prev = currentLIR;
     newLIR->next = currentLIR->next;
     currentLIR->next = newLIR;
diff --git a/vm/compiler/Loop.cpp b/vm/compiler/Loop.cpp
index dc04a1135..c830432ad 100644
--- a/vm/compiler/Loop.cpp
+++ b/vm/compiler/Loop.cpp
@@ -339,6 +339,15 @@ static void updateRangeCheckInfo(CompilationUnit *cUnit, int arrayReg,
     }
 }
 
+__attribute__((weak)) void dvmCompilerDumpMIRInCodeMotion(CompilationUnit *cUnit, MIR *mir)
+{
+    if(cUnit->printMe){
+        DecodedInstruction *decInsn = &(mir->dalvikInsn);
+        char *decodedString = dvmCompilerGetDalvikDisassembly(decInsn, NULL);
+        ALOGD("%#06x %s", decInsn->opcode, decodedString);
+    }
+}
+
 /* Returns true if the loop body cannot throw any exceptions */
 static bool doLoopBodyCodeMotion(CompilationUnit *cUnit)
 {
@@ -349,7 +358,7 @@ static bool doLoopBodyCodeMotion(CompilationUnit *cUnit)
     for (mir = loopBody->firstMIRInsn; mir; mir = mir->next) {
         DecodedInstruction *dInsn = &mir->dalvikInsn;
         int dfAttributes =
-            dvmCompilerDataFlowAttributes[mir->dalvikInsn.opcode];
+            dvmGetDexOptAttributes(&mir->dalvikInsn);
 
         /* Skip extended MIR instructions */
         if ((u2) dInsn->opcode >= kNumPackedOpcodes) continue;
@@ -420,6 +429,7 @@ static bool doLoopBodyCodeMotion(CompilationUnit *cUnit)
                 updateRangeCheckInfo(cUnit, mir->ssaRep->uses[refIdx],
                                      mir->ssaRep->uses[useIdx]);
             }
+            dvmCompilerDumpMIRInCodeMotion(cUnit, mir);
         }
     }
 
@@ -508,6 +518,7 @@ static void genHoistedChecks(CompilationUnit *cUnit)
                 dvmCompilerAbort(cUnit);
             }
         }
+        cUnit->hasHoistedChecks=true;
     }
 }
 
@@ -667,6 +678,62 @@ bool dvmCompilerFilterLoopBlocks(CompilationUnit *cUnit)
     return true;
 }
 
+__attribute__((weak)) void dvmCompilerDumpIVList(CompilationUnit *cUnit)
+{
+    unsigned int i;
+    GrowableList *ivList = cUnit->loopAnalysis->ivList;
+
+    if(cUnit->printMe){
+        for (i = 0; i < ivList->numUsed; i++) {
+            InductionVariableInfo *ivInfo =
+                (InductionVariableInfo *) ivList->elemList[i];
+            int iv = dvmConvertSSARegToDalvik(cUnit, ivInfo->ssaReg);
+            /* Basic IV */
+            if (ivInfo->ssaReg == ivInfo->basicSSAReg) {
+                ALOGD("BIV %d: s%d(v%d_%d) + %d", i,
+                    ivInfo->ssaReg,
+                    DECODE_REG(iv), DECODE_SUB(iv),
+                    ivInfo->inc);
+            /* Dependent IV */
+            } else {
+                int biv = dvmConvertSSARegToDalvik(cUnit, ivInfo->basicSSAReg);
+
+                ALOGD("DIV %d: s%d(v%d_%d) = %d * s%d(v%d_%d) + %d", i,
+                    ivInfo->ssaReg,
+                    DECODE_REG(iv), DECODE_SUB(iv),
+                    ivInfo->m,
+                    ivInfo->basicSSAReg,
+                    DECODE_REG(biv), DECODE_SUB(biv),
+                    ivInfo->c);
+            }
+        }
+    }
+}
+
+__attribute__((weak)) void dvmCompilerDumpHoistedChecks(CompilationUnit *cUnit)
+{
+    LoopAnalysis *loopAnalysis = cUnit->loopAnalysis;
+    unsigned int i;
+    if(cUnit->printMe){
+        for (i = 0; i < loopAnalysis->arrayAccessInfo->numUsed; i++) {
+            ArrayAccessInfo *arrayAccessInfo =
+                GET_ELEM_N(loopAnalysis->arrayAccessInfo,
+                           ArrayAccessInfo*, i);
+            int arrayReg = DECODE_REG(
+                dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->arrayReg));
+            int idxReg = DECODE_REG(
+                dvmConvertSSARegToDalvik(cUnit, arrayAccessInfo->ivReg));
+            ALOGD("Array access %d", i);
+            ALOGD("  arrayReg %d", arrayReg);
+            ALOGD("  idxReg %d", idxReg);
+            ALOGD("  endReg %d", loopAnalysis->endConditionReg);
+            ALOGD("  maxC %d", arrayAccessInfo->maxC);
+            ALOGD("  minC %d", arrayAccessInfo->minC);
+            ALOGD("  opcode %d", loopAnalysis->loopBranchOpcode);
+        }
+    }
+}
+
 /*
  * Main entry point to do loop optimization.
  * Return false if sanity checks for loop formation/optimization failed.
@@ -699,6 +766,8 @@ bool dvmCompilerLoopOpt(CompilationUnit *cUnit)
                                           false /* isIterative */);
     DEBUG_LOOP(dumpIVList(cUnit);)
 
+    dvmCompilerDumpIVList(cUnit);
+
     /* Only optimize array accesses for simple counted loop for now */
     if (!isSimpleCountedLoop(cUnit))
         return false;
@@ -714,6 +783,7 @@ bool dvmCompilerLoopOpt(CompilationUnit *cUnit)
      * header.
      */
     genHoistedChecks(cUnit);
+    dvmCompilerDumpHoistedChecks(cUnit);
     return true;
 }
 
diff --git a/vm/compiler/SSATransformation.cpp b/vm/compiler/SSATransformation.cpp
index 7dde59411..dc4e25c38 100644
--- a/vm/compiler/SSATransformation.cpp
+++ b/vm/compiler/SSATransformation.cpp
@@ -153,7 +153,7 @@ static void checkForDominanceFrontier(BasicBlock *domBB,
     if (succBB->iDom != domBB &&
         succBB->blockType == kDalvikByteCode &&
         succBB->hidden == false) {
-        dvmSetBit(domBB->domFrontier, succBB->id);
+        dvmCompilerSetBit(domBB->domFrontier, succBB->id);
     }
 }
 
@@ -253,7 +253,7 @@ static bool computeBlockDominators(CompilationUnit *cUnit, BasicBlock *bb)
         /* tempBlockV = tempBlockV ^ dominators */
         dvmIntersectBitVectors(tempBlockV, tempBlockV, predBB->dominators);
     }
-    dvmSetBit(tempBlockV, bb->id);
+    dvmCompilerSetBit(tempBlockV, bb->id);
     if (dvmCompareBitVectors(tempBlockV, bb->dominators)) {
         dvmCopyBitVector(bb->dominators, tempBlockV);
         return true;
@@ -317,7 +317,7 @@ static void computeDominators(CompilationUnit *cUnit)
 
     /* Set the dominator for the root node */
     dvmClearAllBits(cUnit->entryBlock->dominators);
-    dvmSetBit(cUnit->entryBlock->dominators, cUnit->entryBlock->id);
+    dvmCompilerSetBit(cUnit->entryBlock->dominators, cUnit->entryBlock->id);
 
     if (cUnit->tempBlockV == NULL) {
         cUnit->tempBlockV = dvmCompilerAllocBitVector(numTotalBlocks,
@@ -526,7 +526,7 @@ static bool insertPhiNodeOperands(CompilationUnit *cUnit, BasicBlock *bb)
             int encodedSSAValue =
                 predBB->dataFlowInfo->dalvikToSSAMap[dalvikReg];
             int ssaReg = DECODE_REG(encodedSSAValue);
-            dvmSetBit(ssaRegV, ssaReg);
+            dvmCompilerSetBit(ssaRegV, ssaReg);
         }
 
         /* Count the number of SSA registers for a Dalvik register */
@@ -589,6 +589,13 @@ void dvmCompilerMethodSSATransformation(CompilationUnit *cUnit)
                                           false /* isIterative */);
 }
 
+/* brief report of DFS order of trace blocks */
+__attribute__((weak)) void dumpDFSOrder(CompilationUnit *cUnit)
+{
+    ALOGV("DFS order complete");
+    return;
+}
+
 /* Build a loop. Return true if a loop structure is successfully identified. */
 bool dvmCompilerBuildLoop(CompilationUnit *cUnit)
 {
@@ -605,6 +612,8 @@ bool dvmCompilerBuildLoop(CompilationUnit *cUnit)
     /* Re-compute the DFS order just for the loop */
     computeDFSOrder(cUnit);
 
+    dumpDFSOrder(cUnit);
+
     /* Re-compute the dominator info just for the loop */
     computeDominators(cUnit);
 
diff --git a/vm/compiler/codegen/CodegenFactory.cpp b/vm/compiler/codegen/CodegenFactory.cpp
index f42ae746b..2bf27f5ab 100644
--- a/vm/compiler/codegen/CodegenFactory.cpp
+++ b/vm/compiler/codegen/CodegenFactory.cpp
@@ -133,9 +133,18 @@ static RegLocation loadValue(CompilationUnit *cUnit, RegLocation rlSrc,
     return rlSrc;
 }
 
+__attribute__((weak)) bool storeValueThumb2(CompilationUnit *cUnit, RegLocation rlDest,
+                                            RegLocation rlSrc)
+{
+    return false;
+}
+
 static void storeValue(CompilationUnit *cUnit, RegLocation rlDest,
                        RegLocation rlSrc)
 {
+    if(storeValueThumb2(cUnit, rlDest, rlSrc))
+        return;
+
     LIR *defStart;
     LIR *defEnd;
     assert(!rlDest.wide);
@@ -204,9 +213,18 @@ static RegLocation loadValueWide(CompilationUnit *cUnit, RegLocation rlSrc,
     return rlSrc;
 }
 
+__attribute__((weak)) bool storeValueWideThumb2(CompilationUnit *cUnit, RegLocation rlDest,
+                                               RegLocation rlSrc)
+{
+    return false;
+}
+
 static void storeValueWide(CompilationUnit *cUnit, RegLocation rlDest,
                            RegLocation rlSrc)
 {
+    if(storeValueWideThumb2(cUnit, rlDest, rlSrc))
+        return;
+
     LIR *defStart;
     LIR *defEnd;
     assert(FPREG(rlSrc.lowReg)==FPREG(rlSrc.highReg));
diff --git a/vm/compiler/codegen/Optimizer.h b/vm/compiler/codegen/Optimizer.h
index 36f33e226..914064d6b 100644
--- a/vm/compiler/codegen/Optimizer.h
+++ b/vm/compiler/codegen/Optimizer.h
@@ -30,7 +30,9 @@ enum optControlVector {
     kSuppressLoads,
     kMethodInlining,
     kMethodJit,
+#ifndef WITH_QC_PERF
     kShiftArithmetic,
+#endif
 };
 
 /* Forward declarations */
diff --git a/vm/compiler/codegen/PostOptimizer.h b/vm/compiler/codegen/PostOptimizer.h
new file mode 100644
index 000000000..006102ac3
--- /dev/null
+++ b/vm/compiler/codegen/PostOptimizer.h
@@ -0,0 +1,267 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials provided
+ *       with the distribution.
+ *     * Neither the name of The Linux Foundation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DALVIK_VM_COMPILER_POSTOPTIMIZATION_H_
+#define DALVIK_VM_COMPILER_POSTOPTIMIZATION_H_
+
+#include "Dalvik.h"
+#include "libdex/DexOpcodes.h"
+#include "compiler/codegen/arm/ArmLIR.h"
+
+//#include "compiler/codegen/Ralloc.h"
+
+//#include "compiler/codegen/arm/ArmLIR.h"
+/*
+#include "InlineNative.h"
+#include "vm/Globals.h"
+#include "vm/compiler/Loop.h"
+#include "vm/compiler/Compiler.h"
+#include "vm/compiler/CompilerInternals.h"
+#include "vm/compiler/codegen/arm/ArmLIR.h"
+#include "libdex/OpCodeNames.h"
+#include "vm/compiler/codegen/arm/CalloutHelper.h"
+#include "vm/compiler/codegen/arm/Ralloc.h"
+*/
+
+
+/* Forward declarations */
+struct CompilationUnit;
+struct LIR;
+struct MIR;
+//struct RegLocation;
+struct ArmLIR;
+
+/*
+enum OpCode; 
+enum RegisterClass;
+////enum   ArmOpCode;
+enum ArmOpcode;
+enum ArmConditionCode;
+enum TemplateOpCode;
+*/
+
+void dvmCompilerApplyLocalOptimizations(struct CompilationUnit *cUnit,
+                                        struct LIR *head,
+                                        struct LIR *tail);
+
+void dvmCompilerApplyGlobalOptimizations(struct CompilationUnit *cUnit);
+
+bool dvmArithLocalOptimization(struct CompilationUnit *cUnit,
+                             struct MIR *mir,
+                             struct RegLocation rlDest,
+                             struct RegLocation rlSrc1,
+                             struct RegLocation rlSrc2);
+
+typedef struct LocalOptsFuncMap{
+
+    bool (*handleEasyDivide) (struct CompilationUnit *cUnit,
+                              enum Opcode dalvikOpCode,
+                              struct RegLocation rlSrc,
+                              struct RegLocation rlDest,
+                              int lit);
+    bool (*handleEasyMultiply) (struct CompilationUnit *cUnit,
+                                struct RegLocation rlSrc,
+                                struct RegLocation rlDest,
+                                int lit);
+    bool (*handleExecuteInline) (struct CompilationUnit *cUnit,
+                                 struct MIR *mir);
+    void (*handleExtendedMIR) (struct CompilationUnit *cUnit,
+                               struct MIR *mir);
+    void (*insertChainingSwitch) (struct CompilationUnit *cUnit);
+    bool (*isPopCountLE2) (unsigned int x);
+    bool (*isPowerOfTwo) (int x);
+    int (*lowestSetBit) (unsigned int x);
+    void (*markCard) (struct CompilationUnit *cUnit,
+                      int valReg,
+                      int tgtAddrReg);
+    void (*setupLoopEntryBlock) (struct CompilationUnit *cUnit,
+                                 struct  BasicBlock *entry,
+                                 struct ArmLIR *bodyLabel);
+    void (*genInterpSingleStep) (struct CompilationUnit *cUnit,
+                                 struct  MIR *mir);
+    void (*setMemRefType) (struct ArmLIR *lir,
+                           bool isLoad,
+                           int memType);
+    void (*annotateDalvikRegAccess) (struct ArmLIR *lir,
+                                     int regId,
+                                     bool isLoad);
+    void (*setupResourceMasks) (struct ArmLIR *lir);
+    struct ArmLIR *(*newLIR0) (struct CompilationUnit *cUnit,
+                               enum ArmOpcode opCode);
+    struct ArmLIR *(*newLIR1) (struct CompilationUnit *cUnit,
+                               enum ArmOpcode opCode,
+                               int dest);
+    struct ArmLIR *(*newLIR2) (struct CompilationUnit *cUnit,
+                               enum ArmOpcode opCode,
+                               int dest,
+                               int src1);
+    struct ArmLIR *(*newLIR3) (struct CompilationUnit *cUnit,
+                               enum ArmOpcode opCode,
+                               int dest,
+                               int src1,
+                               int src2);
+#if defined(_ARMV7_A) || defined(_ARMV7_A_NEON)
+    struct ArmLIR *(*newLIR4) (struct CompilationUnit *cUnit,
+                               enum ArmOpcode opCode,
+                               int dest,
+                               int src1,
+                               int src2,
+                               int info);
+#endif
+    struct RegLocation (*inlinedTarget) (struct CompilationUnit *cUnit,
+                                         struct MIR *mir,
+                                         bool fpHint);
+    struct ArmLIR *(*genCheckCommon) (struct CompilationUnit *cUnit,
+                                      int dOffset,
+                                      struct ArmLIR *branch,
+                                      struct ArmLIR *pcrLabel);
+    struct ArmLIR *(*loadWordDisp) (struct CompilationUnit *cUnit,
+                                    int rBase,
+                                    int displacement,
+                                    int rDest);
+    struct ArmLIR *(*storeWordDisp) (struct CompilationUnit *cUnit,
+                                     int rBase,
+                                     int displacement,
+                                     int rSrc);
+    void (*loadValueDirect) (struct CompilationUnit *cUnit,
+                             struct RegLocation rlSrc,
+                             int reg1);
+    void (*loadValueDirectFixed) (struct CompilationUnit *cUnit,
+                                  struct RegLocation rlSrc,
+                                  int reg1);
+    void (*loadValueDirectWide) (struct CompilationUnit *cUnit,
+                                 struct RegLocation rlSrc,
+                                 int regLo,
+                                 int regHi);
+    void (*loadValueDirectWideFixed) (struct CompilationUnit *cUnit,
+                                      struct RegLocation rlSrc,
+                                      int regLo,
+                                      int regHi);
+    struct RegLocation (*loadValue) (struct CompilationUnit *cUnit,
+                                     struct RegLocation rlSrc,
+                                     enum RegisterClass opKind);
+    void (*storeValue) (struct CompilationUnit *cUnit,
+                        struct  RegLocation rlDest,
+                        struct RegLocation rlSrc);
+    struct RegLocation (*loadValueWide) (struct CompilationUnit *cUnit,
+                                         struct RegLocation rlSrc,
+                                         enum RegisterClass opKind);
+    struct ArmLIR *(*genNullCheck) (struct CompilationUnit *cUnit,
+                                    int sReg,
+                                    int mReg,
+                                    int dOffset,
+                                    struct ArmLIR *pcrLabel);
+    struct ArmLIR *(*genRegRegCheck) (struct CompilationUnit *cUnit,
+                                      enum ArmConditionCode cond,
+                                      int reg1,
+                                      int reg2,
+                                      int dOffset,
+                                      struct ArmLIR *pcrLabel);
+    struct ArmLIR *(*genZeroCheck) (struct CompilationUnit *cUnit,
+                                    int mReg,
+                                    int dOffset,
+                                    struct ArmLIR *pcrLabel);
+    struct ArmLIR *(*genBoundsCheck) (struct CompilationUnit *cUnit,
+                                      int rIndex,
+                                      int rBound,
+                                      int dOffset,
+                                      struct ArmLIR *pcrLabel);
+    struct ArmLIR *(*loadConstantNoClobber) (struct CompilationUnit *cUnit,
+                                             int rDest,
+                                             int value);
+    struct ArmLIR *(*loadConstant) (struct CompilationUnit *cUnit,
+                                    int rDest,
+                                    int value);
+    void (*storeValueWide) (struct CompilationUnit *cUnit,
+                            struct  RegLocation rlDest,
+                            struct RegLocation rlSrc);
+    void (*genSuspendPoll) (struct CompilationUnit *cUnit, struct MIR *mir);
+    struct ArmLIR *(*storeBaseDispWide)(struct CompilationUnit *cUnit,
+                                        int rBase,
+                                        int displacement,
+                                        int rSrcLo,
+                                        int rSrcHi);
+    struct ArmLIR *(*storeBaseDisp)(struct CompilationUnit *cUnit,
+                                        int rBase,
+                                        int displacement,
+                                        int rSrc,
+                                        OpSize size);
+    struct ArmLIR *(*loadBaseDispWide)(struct CompilationUnit *cUnit,
+                                        MIR *mir,
+                                        int rBase,
+                                        int displacement,
+                                        int rDestLo,
+                                        int rDestHi,
+                                        int sReg);
+    struct ArmLIR *(*opRegRegImm)(struct CompilationUnit *cUnit,
+                                    enum OpKind op,
+                                    int rDest,
+                                    int rSrc1,
+                                    int value);
+    struct ArmLIR *(*opRegRegReg)(struct CompilationUnit *cUnit,
+                                    enum OpKind op,
+                                    int rDest,
+                                    int rSrc1,
+                                    int rSrc2);
+    struct ArmLIR *(*loadBaseIndexed)(struct CompilationUnit *cUnit,
+                                        int rBase,
+                                        int rIndex,
+                                        int rDest,
+                                        int scale,
+                                        enum OpSize size);
+    struct ArmLIR *(*storeBaseIndexed)(struct CompilationUnit *cUnit,
+                                        int rBase,
+                                        int rIndex,
+                                        int rSrc,
+                                        int scale,
+                                        enum OpSize size);
+    enum RegisterClass (*dvmCompilerRegClassBySize)(enum OpSize size);
+    int (*encodeShift)(int code, int amount);
+    struct ArmLIR *(*opRegReg)(struct CompilationUnit *cUnit,
+                                enum OpKind op,
+                                int rDestSrc1,
+                                int rSrc2);
+    struct ArmLIR *(*opCondBranch)(struct CompilationUnit *cUnit,
+                                    enum ArmConditionCode cc);
+    struct ArmLIR *(*genIT)(struct CompilationUnit *cUnit,
+                            enum ArmConditionCode code,
+                            const char *guide);
+    void (*genBarrier)(struct CompilationUnit *cUnit);
+    int (*modifiedImmediate)(u4 value);
+    struct ArmLIR *(*genRegImmCheck)(struct CompilationUnit *cUnit,
+                                    enum ArmConditionCode cond,
+                                    int reg,
+                                    int checkValue,
+                                    int dOffset,
+                                    ArmLIR *pcrLabel);
+} LocalOptsFuncMap;
+
+extern LocalOptsFuncMap localOptsFunMap;
+
+#endif  // DALVIK_VM_COMPILER_POSTOPTIMIZATION_H_
+
diff --git a/vm/compiler/codegen/arm/ArchFactory.cpp b/vm/compiler/codegen/arm/ArchFactory.cpp
index 2daa7bcba..6be40b2d2 100644
--- a/vm/compiler/codegen/arm/ArchFactory.cpp
+++ b/vm/compiler/codegen/arm/ArchFactory.cpp
@@ -61,7 +61,7 @@ static TGT_LIR *genNullCheck(CompilationUnit *cUnit, int sReg, int mReg,
     if (dvmIsBitSet(cUnit->regPool->nullCheckedRegs, sReg)) {
         return pcrLabel;
     }
-    dvmSetBit(cUnit->regPool->nullCheckedRegs, sReg);
+    dvmCompilerSetBit(cUnit->regPool->nullCheckedRegs, sReg);
     return genRegImmCheck(cUnit, kArmCondEq, mReg, 0, dOffset, pcrLabel);
 }
 
diff --git a/vm/compiler/codegen/arm/ArchUtility.cpp b/vm/compiler/codegen/arm/ArchUtility.cpp
index 9f87b7ff4..db1281ff1 100644
--- a/vm/compiler/codegen/arm/ArchUtility.cpp
+++ b/vm/compiler/codegen/arm/ArchUtility.cpp
@@ -144,11 +144,14 @@ static void buildInsnString(const char *fmt, ArmLIR *lir, char* buf,
                        operand = expandImmediate(operand);
                        sprintf(tbuf,"%d [%#x]", operand, operand);
                        break;
+                   case 'q':
+                       sprintf(tbuf,"q%d",(operand - 128 - FP_REG_OFFSET) >> 2);
+                       break;
                    case 's':
                        sprintf(tbuf,"s%d",operand & FP_REG_MASK);
                        break;
                    case 'S':
-                       sprintf(tbuf,"d%d",(operand & FP_REG_MASK) >> 1);
+                       sprintf(tbuf,"d%d",(operand - FP_DOUBLE - FP_REG_OFFSET) >> 1);
                        break;
                    case 'h':
                        sprintf(tbuf,"%04x", operand);
@@ -189,6 +192,15 @@ static void buildInsnString(const char *fmt, ArmLIR *lir, char* buf,
                            case kArmCondMi:
                                strcpy(tbuf, "mi");
                                break;
+                           case kArmCondPl:
+                               strcpy(tbuf, "pl");
+                               break;
+                           case kArmCondHi:
+                               strcpy(tbuf, "hi");
+                               break;
+                           case kArmCondLs:
+                               strcpy(tbuf, "ls");
+                               break;
                            default:
                                strcpy(tbuf, "");
                                break;
@@ -291,12 +303,26 @@ void dvmDumpResourceMask(LIR *lir, u8 mask, const char *prefix)
 #define DUMP_RESOURCE_MASK(X)
 #define DUMP_SSA_REP(X)
 
+/*
+ * Decodes generic ARM opcodes
+ */
+static void printDefaultInstr(ArmLIR *lir, unsigned char *baseAddr)
+{
+    char buf[256];
+    char opName[256];
+    int  offset = lir->generic.offset;
+
+    buildInsnString(getEncoding(lir->opcode)->name, lir, opName, baseAddr, 256);
+    buildInsnString(getEncoding(lir->opcode)->fmt,  lir, buf,    baseAddr, 256);
+    ALOGD("%p (%04x): %-12s%s%s",
+         baseAddr + offset, offset, opName, buf,
+         lir->flags.isNop ? "(nop)" : "");
+}
+
 /* Pretty-print a LIR instruction */
 void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr)
 {
     ArmLIR *lir = (ArmLIR *) arg;
-    char buf[256];
-    char opName[256];
     int offset = lir->generic.offset;
     int dest = lir->operands[0];
     const bool dumpNop = false;
@@ -358,6 +384,10 @@ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr)
             ALOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x", dest,
                  lir->operands[1]);
             break;
+        case kArmPseudoPCReconstructionCellExtended:
+            ALOGD("-------- reconstruct dalvik PC : 0x%04x @ +0x%04x (extended)\n", dest,
+                 lir->operands[1]);
+            break;
         case kArmPseudoPCReconstructionBlockLabel:
             /* Do nothing */
             break;
@@ -372,13 +402,7 @@ void dvmDumpLIRInsn(LIR *arg, unsigned char *baseAddr)
             if (lir->flags.isNop && !dumpNop) {
                 break;
             }
-            buildInsnString(EncodingMap[lir->opcode].name, lir, opName,
-                            baseAddr, 256);
-            buildInsnString(EncodingMap[lir->opcode].fmt, lir, buf, baseAddr,
-                            256);
-            ALOGD("%p (%04x): %-8s%s%s",
-                 baseAddr + offset, offset, opName, buf,
-                 lir->flags.isNop ? "(nop)" : "");
+            printDefaultInstr(lir, baseAddr);
             break;
     }
 
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index e159aecdb..49d5b2725 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -316,6 +316,7 @@ typedef enum ArmConditionCode {
  * Assemble.c.
  */
 typedef enum ArmOpcode {
+    kArmPseudoPCReconstructionCellExtended = -19,       /* pcReconstruction for extended MIR*/
     kArmChainingCellBottom = -18,
     kArmPseudoBarrier = -17,
     kArmPseudoExtended = -16,
@@ -536,7 +537,7 @@ typedef enum ArmOpcode {
                                    [0000] rm[3..0] */
     kThumb2MulRRR,       /* mul [111110110000] rn[19..16] [1111] rd[11..8]
                                    [0000] rm[3..0] */
-    kThumb2MnvRR,        /* mvn [11101010011011110] rd[11-8] [0000]
+    kThumb2MvnRR,        /* mvn [11101010011011110] rd[11-8] [0000]
                                    rm[3..0] */
     kThumb2RsubRRI8,     /* rsub [111100011100] rn[19..16] [0000] rd[11..8]
                                    imm8[7..0] */
@@ -627,8 +628,10 @@ typedef enum ArmOpcode {
     kThumb2Dmb,          /* dmb [1111001110111111100011110101] option[3-0] */
     kThumb2LdrPcReln12,  /* ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12]
                                   imm12[11-0] */
+#ifndef WITH_QC_PERF
     kThumb2RsbRRR,       /* rsb [111010111101] rn[19..16] [0000] rd[11..8]
                                   [0000] rm[3..0] */
+#endif
     kThumbUndefined,     /* undefined [11011110xxxxxxxx] */
     kArmLast,
 } ArmOpcode;
@@ -670,6 +673,8 @@ typedef enum ArmOpFeatureFlags {
     kUsesCCodes,
     kMemLoad,
     kMemStore,
+    kSetsFPStatus,
+    kUsesFPStatus,
 } ArmOpFeatureFlags;
 
 #define IS_LOAD         (1 << kMemLoad)
@@ -697,6 +702,8 @@ typedef enum ArmOpFeatureFlags {
 #define IS_IT           (1 << kIsIT)
 #define SETS_CCODES     (1 << kSetsCCodes)
 #define USES_CCODES     (1 << kUsesCCodes)
+#define SETS_FPSTATUS   (1 << kSetsFPStatus)
+#define USES_FPSTATUS   (1 << kUsesFPStatus)
 
 /* Common combo register usage patterns */
 #define REG_USE01       (REG_USE0 | REG_USE1)
@@ -747,6 +754,7 @@ typedef enum ArmTargetOptHints {
 } ArmTargetOptHints;
 
 extern ArmEncodingMap EncodingMap[kArmLast];
+extern ArmEncodingMap* getEncoding(ArmOpcode opcode);
 
 /*
  * Each instance of this struct holds a pseudo or real LIR instruction:
@@ -776,6 +784,7 @@ typedef struct ArmLIR {
     int aliasInfo;              // For Dalvik register & litpool disambiguation
     u8 useMask;                 // Resource mask for use
     u8 defMask;                 // Resource mask for def
+    u4* extraData;
 } ArmLIR;
 
 /* Init values when a predicted chain is initially assembled */
diff --git a/vm/compiler/codegen/arm/Assemble.cpp b/vm/compiler/codegen/arm/Assemble.cpp
index 10572eb5f..5e64c0107 100644
--- a/vm/compiler/codegen/arm/Assemble.cpp
+++ b/vm/compiler/codegen/arm/Assemble.cpp
@@ -654,7 +654,7 @@ ArmEncodingMap EncodingMap[kArmLast] = {
     ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
-                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES | USES_CCODES,
                  "adcs", "r!0d, r!1d, r!2d!3H", 2),
     ENCODING_MAP(kThumb2AndRRR,  0xea000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
@@ -677,7 +677,7 @@ ArmEncodingMap EncodingMap[kArmLast] = {
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "mul", "r!0d, r!1d, r!2d", 2),
-    ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
+    ENCODING_MAP(kThumb2MvnRR,  0xea6f0000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "mvn", "r!0d, r!1d, shift !2d", 2),
@@ -685,12 +685,12 @@ ArmEncodingMap EncodingMap[kArmLast] = {
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsb", "r!0d,r!1d,#!2m", 2),
-    ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
+                 "rsbs", "r!0d,r!1d,#!2m", 2),
+    ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsbs */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "neg", "r!0d,r!1d", 2),
+                 "negs", "r!0d,r!1d", 2),
     ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
@@ -774,15 +774,15 @@ ArmEncodingMap EncodingMap[kArmLast] = {
                  "it:!1b", "!0c", 1),
     ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
+                 kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES | USES_FPSTATUS,
                  "fmstat", "", 2),
     ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_FPSTATUS,
                  "vcmp.f64", "!0S, !1S", 2),
     ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
                  kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_FPSTATUS,
                  "vcmp.f32", "!0s, !1s", 2),
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
@@ -881,11 +881,13 @@ ArmEncodingMap EncodingMap[kArmLast] = {
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
                  "ldr", "r!0d, [r15pc, -#!1d]", 2),
+#ifndef WITH_QC_PERF
     ENCODING_MAP(kThumb2RsbRRR,  0xebd00000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
                  IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
                  "rsb", "r!0d, r!1d, r!2d!3H", 2),
+#endif
     ENCODING_MAP(kThumbUndefined,       0xde00,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
@@ -929,6 +931,24 @@ static void installLiteralPools(CompilationUnit *cUnit)
 }
 
 /*
+ * Return the encoding map for the specified ARM opcode
+ */
+__attribute__((weak)) ArmEncodingMap* getEncoding(ArmOpcode opcode) {
+    if (opcode > kThumbUndefined) {
+        opcode = kThumbUndefined;
+    }
+    ArmEncodingMap* encoder = &EncodingMap[opcode];
+    return encoder;
+}
+
+/* process more ARM encodings */
+__attribute__((weak)) u4 processMoreEncodings(ArmEncodingMap* encoder, int pos, u4 operand)
+{
+    assert(0);
+    return 0;
+}
+
+/*
  * Assemble the LIR into binary instruction format.  Note that we may
  * discover that pc-relative displacements may not fit the selected
  * instruction.  In those cases we will try to substitute a new code
@@ -1073,7 +1093,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
             NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
         }
 
-        ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
+        ArmEncodingMap* encoder = getEncoding(lir->opcode);
         u4 bits = encoder->skeleton;
         int i;
         for (i = 0; i < 4; i++) {
@@ -1128,7 +1148,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
                 case kFmtDfp: {
                     assert(DOUBLEREG(operand));
                     assert((operand & 0x1) == 0);
-                    int regName = (operand & FP_REG_MASK) >> 1;
+                    int regName = (operand - FP_DOUBLE - FP_REG_OFFSET) / 2;
                     /* Snag the 1-bit slice and position it */
                     value = ((regName & 0x10) >> 4) <<
                             encoder->fieldLoc[i].end;
@@ -1163,7 +1183,8 @@ static AssemblerStatus assembleInstructions(CompilationUnit *cUnit,
                     bits |= value;
                     break;
                 default:
-                    assert(0);
+                    bits |= processMoreEncodings(encoder, i, operand);
+                    break;
             }
         }
         if (encoder->size == 2) {
@@ -1296,10 +1317,10 @@ static inline u4 getChainCellSize(const ChainCellCounts* pChainCellCounts)
     for (i = 0; i < kChainingCellGap; i++) {
         if (i != kChainingCellInvokePredicted) {
             cellSize += pChainCellCounts->u.count[i] *
-                        (CHAIN_CELL_NORMAL_SIZE >> 2);
+                        ((CHAIN_CELL_NORMAL_SIZE >> 2)+pChainCellCounts->extraSize);
         } else {
             cellSize += pChainCellCounts->u.count[i] *
-                (CHAIN_CELL_PREDICTED_SIZE >> 2);
+                ((CHAIN_CELL_PREDICTED_SIZE >> 2)+pChainCellCounts->extraSize);
         }
     }
     return cellSize;
@@ -1378,7 +1399,9 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info)
          armLIR = NEXT_LIR(armLIR)) {
         armLIR->generic.offset = offset;
         if (armLIR->opcode >= 0 && !armLIR->flags.isNop) {
-            armLIR->flags.size = EncodingMap[armLIR->opcode].size * 2;
+            armLIR->flags.size = getEncoding(armLIR->opcode)->size * 2;
+            if(armLIR->flags.size==0)
+                armLIR->flags.isNop=true;
             offset += armLIR->flags.size;
         } else if (armLIR->opcode == kArmPseudoPseudoAlign4) {
             if (offset & 0x2) {
@@ -1521,6 +1544,9 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info)
         /* Set the gap number in the chaining cell count structure */
         chainCellCounts.u.count[kChainingCellGap] = chainingCellGap;
 
+        assert((cUnit->chainingCellExtraSize & 0x3) ==0);
+        chainCellCounts.extraSize = cUnit->chainingCellExtraSize >> 2;
+
         memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts,
                sizeof(chainCellCounts));
 
@@ -1558,7 +1584,7 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info)
  */
 static u4 getSkeleton(ArmOpcode op)
 {
-    return EncodingMap[op].skeleton;
+    return getEncoding(op)->skeleton;
 }
 
 static u4 assembleChainingBranch(int branchOffset, bool thumbTarget)
@@ -1916,6 +1942,7 @@ static u4* unchainSingle(JitEntry *trace)
         }
 
         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
+            pChainCells += pChainCellCounts->extraSize;
             switch(i) {
                 case kChainingCellNormal:
                 case kChainingCellHot:
@@ -2200,13 +2227,14 @@ static void findClassPointersSingleTrace(char *base, void (*callback)(void *))
              chainTypeIdx++) {
             if (chainTypeIdx != kChainingCellInvokePredicted) {
                 /* In 32-bit words */
-                pChainCells += (CHAIN_CELL_NORMAL_SIZE >> 2) *
+                pChainCells += ((CHAIN_CELL_NORMAL_SIZE >> 2)+pChainCellCounts->extraSize) *
                     pChainCellCounts->u.count[chainTypeIdx];
                 continue;
             }
             for (chainIdx = 0;
                  chainIdx < pChainCellCounts->u.count[chainTypeIdx];
                  chainIdx++) {
+                pChainCells += pChainCellCounts->extraSize;
                 PredictedChainingCell *cell =
                     (PredictedChainingCell *) pChainCells;
                 /*
diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h
index 7ec921770..873648cc9 100644
--- a/vm/compiler/codegen/arm/Codegen.h
+++ b/vm/compiler/codegen/arm/Codegen.h
@@ -25,6 +25,8 @@
 #include "compiler/CompilerIR.h"
 #include "CalloutHelper.h"
 
+#include "compiler/codegen/PostOptimizer.h"
+
 #if defined(_CODEGEN_C)
 /*
  * loadConstant() sometimes needs to add a small imm to a pre-existing constant
diff --git a/vm/compiler/codegen/arm/CodegenCommon.cpp b/vm/compiler/codegen/arm/CodegenCommon.cpp
index 5c02678fe..52ae87aa4 100644
--- a/vm/compiler/codegen/arm/CodegenCommon.cpp
+++ b/vm/compiler/codegen/arm/CodegenCommon.cpp
@@ -36,7 +36,7 @@ static void setMemRefType(ArmLIR *lir, bool isLoad, int memType)
 {
     u8 *maskPtr;
     u8 mask = ENCODE_MEM;;
-    assert(EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE));
+    assert(getEncoding(lir->opcode)->flags & (IS_LOAD | IS_STORE));
     if (isLoad) {
         maskPtr = &lir->useMask;
     } else {
@@ -58,7 +58,7 @@ static void setMemRefType(ArmLIR *lir, bool isLoad, int memType)
             break;
         case kMustNotAlias:
             /* Currently only loads can be marked as kMustNotAlias */
-            assert(!(EncodingMap[lir->opcode].flags & IS_STORE));
+            assert(!(getEncoding(lir->opcode)->flags & IS_STORE));
             *maskPtr |= ENCODE_MUST_NOT_ALIAS;
             break;
         default:
@@ -94,7 +94,6 @@ static inline u8 getRegMaskCommon(int reg)
     u8 seed;
     int shift;
     int regId = reg & 0x1f;
-
     /*
      * Each double register is equal to a pair of single-precision FP registers
      */
@@ -120,6 +119,16 @@ static inline void setupRegMask(u8 *mask, int reg)
     *mask |= getRegMaskCommon(reg);
 }
 
+/* skip certain def masks */
+__attribute__((weak)) bool skipDefRegMasks(ArmLIR *lir) {
+    return false;
+}
+
+/* skip certain use masks */
+__attribute__((weak)) bool skipUseRegMasks(ArmLIR *lir) {
+    return false;
+}
+
 /*
  * Set up the proper fields in the resource mask
  */
@@ -133,7 +142,7 @@ static void setupResourceMasks(ArmLIR *lir)
         return;
     }
 
-    flags = EncodingMap[lir->opcode].flags;
+    flags = getEncoding(lir->opcode)->flags;
 
     /* Set up the mask for resources that are updated */
     if (flags & (IS_LOAD | IS_STORE)) {
@@ -150,12 +159,14 @@ static void setupResourceMasks(ArmLIR *lir)
         return;
     }
 
-    if (flags & REG_DEF0) {
-        setupRegMask(&lir->defMask, lir->operands[0]);
-    }
+    if (!skipDefRegMasks(lir)) {
+        if (flags & REG_DEF0) {
+            setupRegMask(&lir->defMask, lir->operands[0]);
+        }
 
-    if (flags & REG_DEF1) {
-        setupRegMask(&lir->defMask, lir->operands[1]);
+        if (flags & REG_DEF1) {
+            setupRegMask(&lir->defMask, lir->operands[1]);
+        }
     }
 
     if (flags & REG_DEF_SP) {
@@ -178,6 +189,10 @@ static void setupResourceMasks(ArmLIR *lir)
         lir->defMask |= ENCODE_CCODE;
     }
 
+    if (flags & SETS_FPSTATUS) {
+        lir->defMask |= ENCODE_FP_STATUS;
+    }
+
     /* Conservatively treat the IT block */
     if (flags & IS_IT) {
         lir->defMask = ENCODE_ALL;
@@ -185,10 +200,11 @@ static void setupResourceMasks(ArmLIR *lir)
 
     if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
         int i;
-
-        for (i = 0; i < 4; i++) {
-            if (flags & (1 << (kRegUse0 + i))) {
-                setupRegMask(&lir->useMask, lir->operands[i]);
+        if (!skipUseRegMasks(lir)) {
+            for (i = 0; i < 4; i++) {
+                if (flags & (1 << (kRegUse0 + i))) {
+                    setupRegMask(&lir->useMask, lir->operands[i]);
+                }
             }
         }
     }
@@ -213,6 +229,10 @@ static void setupResourceMasks(ArmLIR *lir)
         lir->useMask |= ENCODE_CCODE;
     }
 
+    if (flags & USES_FPSTATUS) {
+        lir->useMask |= ENCODE_FP_STATUS;
+    }
+
     /* Fixup for kThumbPush/lr and kThumbPop/pc */
     if (opcode == kThumbPush || opcode == kThumbPop) {
         u8 r8Mask = getRegMaskCommon(r8);
@@ -231,7 +251,7 @@ static void setupResourceMasks(ArmLIR *lir)
  */
 static void relaxBranchMasks(ArmLIR *lir)
 {
-    int flags = EncodingMap[lir->opcode].flags;
+    int flags = getEncoding(lir->opcode)->flags;
 
     /* Make sure only branch instructions are passed here */
     assert(flags & IS_BRANCH);
@@ -264,7 +284,7 @@ static void relaxBranchMasks(ArmLIR *lir)
 static ArmLIR *newLIR0(CompilationUnit *cUnit, ArmOpcode opcode)
 {
     ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
-    assert(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND));
+    assert(isPseudoOpcode(opcode) || (getEncoding(opcode)->flags & NO_OPERAND));
     insn->opcode = opcode;
     setupResourceMasks(insn);
     dvmCompilerAppendLIR(cUnit, (LIR *) insn);
@@ -275,7 +295,7 @@ static ArmLIR *newLIR1(CompilationUnit *cUnit, ArmOpcode opcode,
                            int dest)
 {
     ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
-    assert(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP));
+    assert(isPseudoOpcode(opcode) || (getEncoding(opcode)->flags & IS_UNARY_OP));
     insn->opcode = opcode;
     insn->operands[0] = dest;
     setupResourceMasks(insn);
@@ -288,7 +308,7 @@ static ArmLIR *newLIR2(CompilationUnit *cUnit, ArmOpcode opcode,
 {
     ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
     assert(isPseudoOpcode(opcode) ||
-           (EncodingMap[opcode].flags & IS_BINARY_OP));
+           (getEncoding(opcode)->flags & IS_BINARY_OP));
     insn->opcode = opcode;
     insn->operands[0] = dest;
     insn->operands[1] = src1;
@@ -301,11 +321,11 @@ static ArmLIR *newLIR3(CompilationUnit *cUnit, ArmOpcode opcode,
                            int dest, int src1, int src2)
 {
     ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
-    if (!(EncodingMap[opcode].flags & IS_TERTIARY_OP)) {
+    if (!(getEncoding(opcode)->flags & IS_TERTIARY_OP)) {
         ALOGE("Bad LIR3: %s[%d]",EncodingMap[opcode].name,opcode);
     }
     assert(isPseudoOpcode(opcode) ||
-           (EncodingMap[opcode].flags & IS_TERTIARY_OP));
+           (getEncoding(opcode)->flags & IS_TERTIARY_OP));
     insn->opcode = opcode;
     insn->operands[0] = dest;
     insn->operands[1] = src1;
@@ -317,11 +337,11 @@ static ArmLIR *newLIR3(CompilationUnit *cUnit, ArmOpcode opcode,
 
 #if defined(_ARMV7_A) || defined(_ARMV7_A_NEON)
 static ArmLIR *newLIR4(CompilationUnit *cUnit, ArmOpcode opcode,
-                           int dest, int src1, int src2, int info)
+                       int dest, int src1, int src2, int info)
 {
     ArmLIR *insn = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
     assert(isPseudoOpcode(opcode) ||
-           (EncodingMap[opcode].flags & IS_QUAD_OP));
+           (getEncoding(opcode)->flags & IS_QUAD_OP));
     insn->opcode = opcode;
     insn->operands[0] = dest;
     insn->operands[1] = src1;
diff --git a/vm/compiler/codegen/arm/CodegenDriver.cpp b/vm/compiler/codegen/arm/CodegenDriver.cpp
index de53b00fb..541427592 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.cpp
+++ b/vm/compiler/codegen/arm/CodegenDriver.cpp
@@ -297,9 +297,17 @@ static inline ArmLIR *genTrap(CompilationUnit *cUnit, int dOffset,
     return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
 }
 
+__attribute__((weak)) bool genIGetWideThumb2(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
+{
+    return false;
+}
+
 /* Load a wide field from an object instance */
 static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
 {
+    if (genIGetWideThumb2(cUnit, mir, fieldOffset))
+        return;
+
     RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0);
     RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1);
     RegLocation rlResult;
@@ -321,9 +329,17 @@ static void genIGetWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
     storeValueWide(cUnit, rlDest, rlResult);
 }
 
+__attribute__((weak)) bool genIPutWideThumb2(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
+{
+    return false;
+}
+
 /* Store a wide field to an object instance */
 static void genIPutWide(CompilationUnit *cUnit, MIR *mir, int fieldOffset)
 {
+    if (genIPutWideThumb2(cUnit, mir, fieldOffset))
+        return;
+
     RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1);
     RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 2);
     rlObj = loadValue(cUnit, rlObj, kCoreReg);
@@ -398,6 +414,13 @@ static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
     }
 }
 
+#ifdef WITH_QC_PERF
+__attribute__((weak)) bool genArrayGetThumb2(CompilationUnit *cUnit, MIR *mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlDest, int scale)
+{
+    return false;
+}
 
 /*
  * Generate array load
@@ -406,6 +429,10 @@ static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
                         RegLocation rlArray, RegLocation rlIndex,
                         RegLocation rlDest, int scale)
 {
+    if(genArrayGetThumb2(cUnit, mir, size, rlArray, rlIndex,
+                        rlDest, scale))
+        return;
+
     RegisterClass regClass = dvmCompilerRegClassBySize(size);
     int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
     int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
@@ -467,6 +494,13 @@ static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
     }
 }
 
+__attribute__((weak)) bool genArrayPutThumb2(CompilationUnit *cUnit, MIR *mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlSrc, int scale)
+{
+    return false;
+}
+
 /*
  * Generate array store
  *
@@ -475,6 +509,10 @@ static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
                         RegLocation rlArray, RegLocation rlIndex,
                         RegLocation rlSrc, int scale)
 {
+    if(genArrayPutThumb2(cUnit, mir, size, rlArray, rlIndex,
+                        rlSrc, scale))
+        return;
+
     RegisterClass regClass = dvmCompilerRegClassBySize(size);
     int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
     int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
@@ -540,6 +578,7 @@ static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
         HEAP_ACCESS_SHADOW(false);
     }
 }
+#endif
 
 /*
  * Generate array object store
@@ -673,6 +712,7 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir,
     bool checkZero = false;
     void *callTgt;
     int retReg = r0;
+    bool setCCode = false;
 
     switch (mir->dalvikInsn.opcode) {
         case OP_NOT_LONG:
@@ -687,11 +727,13 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir,
         case OP_ADD_LONG_2ADDR:
             firstOp = kOpAdd;
             secondOp = kOpAdc;
+            setCCode = true;
             break;
         case OP_SUB_LONG:
         case OP_SUB_LONG_2ADDR:
             firstOp = kOpSub;
             secondOp = kOpSbc;
+            setCCode = true;
             break;
         case OP_MUL_LONG:
         case OP_MUL_LONG_2ADDR:
@@ -733,8 +775,10 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir,
             rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
             rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
             loadConstantNoClobber(cUnit, tReg, 0);
+            SET_CCODE;
             opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
                         tReg, rlSrc2.lowReg);
+            UNSET_CCODE;
             opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
             genRegCopy(cUnit, rlResult.highReg, tReg);
             storeValueWide(cUnit, rlDest, rlResult);
@@ -745,7 +789,7 @@ static bool genArithOpLong(CompilationUnit *cUnit, MIR *mir,
             dvmCompilerAbort(cUnit);
     }
     if (!callOut) {
-        genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
+        genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2, setCCode);
     } else {
         // Adjust return regs in to handle case of rem returning r2/r3
         dvmCompilerFlushAllRegs(cUnit);   /* Send everything to home location */
@@ -1078,7 +1122,9 @@ static void genProcessArgsRange(CompilationUnit *cUnit, MIR *mir,
         loadMultiple(cUnit, r4PC, regMask);
         /* No need to generate the loop structure if numArgs <= 11 */
         if (numArgs > 11) {
+            SET_CCODE;
             opRegImm(cUnit, kOpSub, r5FP, 4);
+            UNSET_CCODE;
             genConditionalBranch(cUnit, kArmCondNe, loopLabel);
         }
     }
@@ -1449,6 +1495,20 @@ static void genSuspendPoll(CompilationUnit *cUnit, MIR *mir)
     genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
 }
 
+__attribute__((weak)) void dvmGenSuspendPoll(CompilationUnit *cUnit,
+                                            BasicBlock *bb,
+                                            MIR *mir,
+                                            bool genSuspendPollEnabled)
+{
+    /* backward branch? */
+    bool backwardBranch = (bb->taken->startOffset <= mir->offset);
+
+    if (backwardBranch &&
+        (genSuspendPollEnabled || cUnit->jitMode == kJitLoop)) {
+        genSuspendPoll(cUnit, mir);
+    }
+}
+
 /*
  * The following are the first-level codegen routines that analyze the format
  * of each bytecode then either dispatch special purpose codegen routines
@@ -1458,13 +1518,7 @@ static void genSuspendPoll(CompilationUnit *cUnit, MIR *mir)
 static bool handleFmt10t_Fmt20t_Fmt30t(CompilationUnit *cUnit, MIR *mir,
                                        BasicBlock *bb, ArmLIR *labelList)
 {
-    /* backward branch? */
-    bool backwardBranch = (bb->taken->startOffset <= mir->offset);
-
-    if (backwardBranch &&
-        (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
-        genSuspendPoll(cUnit, mir);
-    }
+    dvmGenSuspendPoll(cUnit, bb, mir, gDvmJit.genSuspendPoll);
 
     int numPredecessors = dvmCountSetBits(bb->taken->predecessors);
     /*
@@ -1530,10 +1584,17 @@ static bool handleFmt11n_Fmt31i(CompilationUnit *cUnit, MIR *mir)
         case OP_CONST_WIDE_32: {
             //TUNING: single routine to load constant pair for support doubles
             //TUNING: load 0/-1 separately to avoid load dependency
-            rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
-            opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
-                        rlResult.lowReg, 31);
+            rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+            if(FPREG(rlResult.lowReg)){
+                /* if the constant is FP, use VFP register to hold it */
+                loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
+                                    mir->dalvikInsn.vB,
+                                    ((mir->dalvikInsn.vB)&0x80000000) == 0x80000000? -1:0);
+            }else{
+                loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
+                opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
+                            rlResult.lowReg, 31);
+            }
             storeValueWide(cUnit, rlDest, rlResult);
             break;
         }
@@ -2081,10 +2142,17 @@ static bool handleFmt21s(CompilationUnit *cUnit, MIR *mir)
     int BBBB = mir->dalvikInsn.vB;
     if (dalvikOpcode == OP_CONST_WIDE_16) {
         rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1);
-        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-        loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB);
-        //TUNING: do high separately to avoid load dependency
-        opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31);
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+        if(FPREG(rlResult.lowReg)){
+            /* if the constant is FP, use VFP register to hold it */
+            loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
+                                mir->dalvikInsn.vB,
+                                ((mir->dalvikInsn.vB)&0x80000000) == 0x80000000? -1:0);
+        }else{
+            loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB);
+            //TUNING: do high separately to avoid load dependency
+            opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31);
+        }
         storeValueWide(cUnit, rlDest, rlResult);
     } else if (dalvikOpcode == OP_CONST_16) {
         rlDest = dvmCompilerGetDest(cUnit, mir, 0);
@@ -2102,13 +2170,7 @@ static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
 {
     Opcode dalvikOpcode = mir->dalvikInsn.opcode;
     ArmConditionCode cond;
-    /* backward branch? */
-    bool backwardBranch = (bb->taken->startOffset <= mir->offset);
-
-    if (backwardBranch &&
-        (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
-        genSuspendPoll(cUnit, mir);
-    }
+    dvmGenSuspendPoll(cUnit, bb, mir, gDvmJit.genSuspendPoll);
 
     RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
     rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
@@ -2257,7 +2319,14 @@ static bool handleEasyMultiply(CompilationUnit *cUnit,
     } else {
         // Reverse subtract: (src << (shift + 1)) - src.
         assert(powerOfTwoMinusOne);
+#ifdef WITH_QC_PERF
+        // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
+        int tReg = dvmCompilerAllocTemp(cUnit);
+        opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
+        opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
+#else
         genMultiplyByShiftAndReverseSubtract(cUnit, rlSrc, rlResult, lowestSetBit(lit + 1));
+#endif
     }
     storeValue(cUnit, rlDest, rlResult);
     return true;
@@ -2598,13 +2667,7 @@ static bool handleFmt22t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb,
 {
     Opcode dalvikOpcode = mir->dalvikInsn.opcode;
     ArmConditionCode cond;
-    /* backward branch? */
-    bool backwardBranch = (bb->taken->startOffset <= mir->offset);
-
-    if (backwardBranch &&
-        (gDvmJit.genSuspendPoll || cUnit->jitMode == kJitLoop)) {
-        genSuspendPoll(cUnit, mir);
-    }
+    dvmGenSuspendPoll(cUnit, bb, mir, gDvmJit.genSuspendPoll);
 
     RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0);
     RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1);
@@ -2669,6 +2732,17 @@ static bool handleFmt22x_Fmt32x(CompilationUnit *cUnit, MIR *mir)
     return false;
 }
 
+/*
+ * Utility funtion to check the DEX opcode in the MIR
+ */
+__attribute__((weak)) bool isInvalidMIR(CompilationUnit *cUnit, MIR *mir)
+{
+    bool result  = false;
+
+    return result;
+}
+
+
 static bool handleFmt23x(CompilationUnit *cUnit, MIR *mir)
 {
     Opcode opcode = mir->dalvikInsn.opcode;
@@ -3484,7 +3558,9 @@ static bool genInlinedStringIsEmptyOrLength(CompilationUnit *cUnit, MIR *mir,
     if (isEmpty) {
         // dst = (dst == 0);
         int tReg = dvmCompilerAllocTemp(cUnit);
+        SET_CCODE;
         opRegReg(cUnit, kOpNeg, tReg, rlResult.lowReg);
+        UNSET_CCODE;
         opRegRegReg(cUnit, kOpAdc, rlResult.lowReg, rlResult.lowReg, tReg);
     }
     storeValue(cUnit, rlDest, rlResult);
@@ -3561,7 +3637,9 @@ static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
      * mechanism for now.
      */
     opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31);
+    SET_CCODE;
     opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
+    UNSET_CCODE;
     opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg);
     opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
     opRegReg(cUnit, kOpXor, rlResult.highReg, signReg);
@@ -3587,6 +3665,12 @@ static bool genInlinedLongDoubleConversion(CompilationUnit *cUnit, MIR *mir)
     return false;
 }
 
+__attribute__((weak)) int getInlineTableFunc(int operation)
+{
+    const InlineOperation* inLineTable = dvmGetInlineOpsTable();
+    return ((int)inLineTable[operation].func);
+}
+
 /*
  * JITs a call to a C function.
  * TODO: use this for faster native method invocation for simple native
@@ -3597,8 +3681,7 @@ static bool handleExecuteInlineC(CompilationUnit *cUnit, MIR *mir)
     DecodedInstruction *dInsn = &mir->dalvikInsn;
     int operation = dInsn->vB;
     unsigned int i;
-    const InlineOperation* inLineTable = dvmGetInlineOpsTable();
-    uintptr_t fn = (int) inLineTable[operation].func;
+    uintptr_t fn = getInlineTableFunc(operation);
     if (fn == 0) {
         dvmCompilerAbort(cUnit);
     }
@@ -3608,14 +3691,55 @@ static bool handleExecuteInlineC(CompilationUnit *cUnit, MIR *mir)
     dvmCompilerClobber(cUnit, r7);
     int offset = offsetof(Thread, interpSave.retval);
     opRegRegImm(cUnit, kOpAdd, r4PC, r6SELF, offset);
+#ifdef INLINE_ARG_EXPANDED
+            switch( dInsn->vA ){
+                case 7:
+                    loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, 6), r7);
+                    opImm(cUnit, kOpPush, (1<<r7));
+                    /* fall through */
+                case 6:
+                    loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, 5), r7);
+                    opImm(cUnit, kOpPush, (1<<r7));
+                    /* fall through */
+                case 5:
+                    loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, 4), r7);
+            }
+            opImm(cUnit, kOpPush, (1<<r4PC) | (1<<r7));
+            LOAD_FUNC_ADDR(cUnit, r4PC, fn);
+            genExportPC(cUnit, mir);
+#else
     opImm(cUnit, kOpPush, (1<<r4PC) | (1<<r7));
     LOAD_FUNC_ADDR(cUnit, r4PC, fn);
     genExportPC(cUnit, mir);
+#endif
+
+#ifdef INLINE_ARG_EXPANDED
+            if( dInsn->vA >= 5  ){
+                for (i=0; i < 4; i++) {
+                    loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i);
+                }
+            } else {
+                for (i=0; i < dInsn->vA; i++) {
+                    loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i);
+                }
+            }
+#else
     for (i=0; i < dInsn->vA; i++) {
         loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i);
     }
+#endif
     opReg(cUnit, kOpBlx, r4PC);
+#ifdef INLINE_ARG_EXPANDED
+    if( dInsn->vA == 7 ){
+        opRegImm(cUnit, kOpAdd, r13sp, 16);
+    } else if( dInsn->vA == 6 ){
+        opRegImm(cUnit, kOpAdd, r13sp, 12);
+    } else {
+        opRegImm(cUnit, kOpAdd, r13sp, 8);
+    }
+#else
     opRegImm(cUnit, kOpAdd, r13sp, 8);
+#endif
     /* NULL? */
     ArmLIR *branchOver = genCmpImmBranch(cUnit, kArmCondNe, r0, 0);
     loadConstant(cUnit, r0, (int) (cUnit->method->insns + mir->offset));
@@ -3692,6 +3816,8 @@ static bool handleExecuteInline(CompilationUnit *cUnit, MIR *mir)
         case INLINE_DOUBLE_TO_LONG_BITS:
             return handleExecuteInlineC(cUnit, mir);
     }
+    return handleExecuteInlineC(cUnit, mir);
+
     dvmCompilerAbort(cUnit);
     return false; // Not reachable; keeps compiler happy.
 }
@@ -3700,10 +3826,9 @@ static bool handleFmt51l(CompilationUnit *cUnit, MIR *mir)
 {
     //TUNING: We're using core regs here - not optimal when target is a double
     RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1);
-    RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-    loadConstantNoClobber(cUnit, rlResult.lowReg,
-                          mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL);
-    loadConstantNoClobber(cUnit, rlResult.highReg,
+    RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+    loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
+                          mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL,
                           (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL);
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
@@ -3825,6 +3950,31 @@ static void handleInvokePredictedChainingCell(CompilationUnit *cUnit)
     addWordData(cUnit, NULL, PREDICTED_CHAIN_COUNTER_INIT);
 }
 
+static void handlePCReconstructionExtended(CompilationUnit *cUnit)
+{
+    ArmLIR **pcrLabel =
+        (ArmLIR **) cUnit->pcReconstructionListExtended.elemList;
+    int numElems = cUnit->pcReconstructionListExtended.numUsed;
+    int i;
+    ArmLIR *exceptionBlock;
+    if(numElems>0){
+        exceptionBlock = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
+        exceptionBlock->opcode = kArmPseudoEHBlockLabel;
+        for (i = 0; i < numElems; i++) {
+            dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
+            /* r0 = dalvik PC */
+            loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
+            genUnconditionalBranch(cUnit, exceptionBlock);
+        }
+        /* appened exception block after pcReconstruction blocks */
+        dvmCompilerAppendLIR(cUnit, (LIR *) exceptionBlock);
+        loadWordDisp(cUnit, r6SELF, offsetof(Thread,
+                    jitToInterpEntries.dvmJitToInterpPunt),
+                    r1);
+        opReg(cUnit, kOpBlx, r1);
+    }
+}
+
 /* Load the Dalvik PC into r0 and jump to the specified target */
 static void handlePCReconstruction(CompilationUnit *cUnit,
                                    ArmLIR *targetLabel)
@@ -3842,6 +3992,9 @@ static void handlePCReconstruction(CompilationUnit *cUnit,
         newLIR0(cUnit, kThumbUndefined);
     }
 
+    /* handle pcReconstruction for extended MIRs */
+    handlePCReconstructionExtended(cUnit);
+
     for (i = 0; i < numElems; i++) {
         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
         /* r0 = dalvik PC */
@@ -3859,6 +4012,13 @@ static const char *extendedMIROpNames[kMirOpLast - kMirOpFirst] = {
     "kMirOpCheckInlinePrediction",
 };
 
+
+__attribute__((weak)) bool genHoistedChecksForCountUpLoopThumb(CompilationUnit *cUnit,
+                                                                MIR *mir)
+{
+    return false;
+}
+
 /*
  * vA = arrayReg;
  * vB = idxReg;
@@ -3869,6 +4029,8 @@ static const char *extendedMIROpNames[kMirOpLast - kMirOpFirst] = {
  */
 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
 {
+    if(genHoistedChecksForCountUpLoopThumb(cUnit, mir))
+        return;
     /*
      * NOTE: these synthesized blocks don't have ssa names assigned
      * for Dalvik registers.  However, because they dominate the following
@@ -3885,9 +4047,11 @@ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
     /* regArray <- arrayRef */
     rlArray = loadValue(cUnit, rlArray, kCoreReg);
     rlIdxEnd = loadValue(cUnit, rlIdxEnd, kCoreReg);
-    genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0,
-                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
-
+    if (!dvmIsBitSet(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA)){
+        dvmSetBit(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA);
+        genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0,
+                       (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+    }
     /* regLength <- len(arrayRef) */
     regLength = dvmCompilerAllocTemp(cUnit);
     loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLength);
@@ -3912,6 +4076,11 @@ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
                    (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
 }
 
+__attribute__((weak)) bool genHoistedChecksForCountDownLoopThumb(CompilationUnit *cUnit,
+                                                                MIR *mir)
+{
+    return false;
+}
 /*
  * vA = arrayReg;
  * vB = idxReg;
@@ -3922,6 +4091,9 @@ static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
  */
 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
 {
+    if(genHoistedChecksForCountDownLoopThumb(cUnit, mir))
+        return;
+
     DecodedInstruction *dInsn = &mir->dalvikInsn;
     const int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
     const int regLength = dvmCompilerAllocTemp(cUnit);
@@ -3932,8 +4104,11 @@ static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
     /* regArray <- arrayRef */
     rlArray = loadValue(cUnit, rlArray, kCoreReg);
     rlIdxInit = loadValue(cUnit, rlIdxInit, kCoreReg);
-    genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0,
-                   (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+    if (!dvmIsBitSet(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA)){
+        dvmSetBit(cUnit->regPool->nullCheckedRegs, mir->dalvikInsn.vA);
+        genRegImmCheck(cUnit, kArmCondEq, rlArray.lowReg, 0, 0,
+                       (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
+    }
 
     /* regLength <- len(arrayRef) */
     loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLength);
@@ -3950,12 +4125,20 @@ static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
                    (ArmLIR *) cUnit->loopAnalysis->branchToPCR);
 }
 
+__attribute__((weak)) bool genHoistedLowerBoundCheckThumb(CompilationUnit *cUnit,
+                                                                MIR *mir)
+{
+    return false;
+}
 /*
  * vA = idxReg;
  * vB = minC;
  */
 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
 {
+    if(genHoistedLowerBoundCheckThumb(cUnit, mir))
+        return;
+
     DecodedInstruction *dInsn = &mir->dalvikInsn;
     const int minC = dInsn->vB;
     RegLocation rlIdx = cUnit->regLocation[mir->dalvikInsn.vA];
@@ -4125,12 +4308,12 @@ static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
 {
     /* Set up the place holder to reconstruct this Dalvik PC */
     ArmLIR *pcrLabel = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
-    pcrLabel->opcode = kArmPseudoPCReconstructionCell;
+    pcrLabel->opcode = kArmPseudoPCReconstructionCellExtended;
     pcrLabel->operands[0] =
         (int) (cUnit->method->insns + entry->startOffset);
     pcrLabel->operands[1] = entry->startOffset;
     /* Insert the place holder to the growable list */
-    dvmInsertGrowableList(&cUnit->pcReconstructionList, (intptr_t) pcrLabel);
+    dvmInsertGrowableList(&cUnit->pcReconstructionListExtended, (intptr_t)pcrLabel);
 
     /*
      * Next, create two branches - one branch over to the loop body and the
@@ -4176,20 +4359,40 @@ static bool selfVerificationPuntOps(MIR *mir)
 }
 #endif
 
+__attribute__((weak)) void dvmCompilerCheckStats(CompilationUnit *cUnit)
+{
+    if (cUnit->printMe){
+        ALOGV("extra size in ChainingCells: %d",cUnit->chainingCellExtraSize);
+        ALOGV("number of extended PCReconstruction cells: %d",
+                                cUnit->pcReconstructionListExtended.numUsed);
+    }
+}
+
+__attribute__((weak)) void dvmCompilerCheckBlockStats(CompilationUnit *cUnit, BasicBlock *bb)
+{
+    if(cUnit->printMe){
+        ALOGV("Current block:%d",bb->id);
+        if(bb->taken)
+            ALOGV("Next taken block:%d", bb->taken->id);
+        if(bb->fallThrough)
+            ALOGV("Next fallThrough block:%d",bb->fallThrough->id);
+    }
+}
+
 void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
 {
     /* Used to hold the labels of each block */
-    ArmLIR *labelList =
-        (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR) * cUnit->numBlocks, true);
+    cUnit->labelList =
+        (void *) dvmCompilerNew(sizeof(ArmLIR) * cUnit->numBlocks, true);
+    ArmLIR *labelList = (ArmLIR *)(cUnit->labelList);
     ArmLIR *headLIR = NULL;
-    GrowableList chainingListByType[kChainingCellGap];
     int i;
 
     /*
      * Initialize various types chaining lists.
      */
     for (i = 0; i < kChainingCellGap; i++) {
-        dvmInitGrowableList(&chainingListByType[i], 2);
+        dvmInitGrowableList(&(cUnit->chainingListByType[i]), 2);
     }
 
     /* Clear the visited flag for each block */
@@ -4210,6 +4413,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
         if (bb->visited == true) continue;
 
         labelList[i].operands[0] = bb->startOffset;
+        bb->blockLabelLIR = (LIR *) &labelList[i];
 
         if (bb->blockType >= kChainingCellGap) {
             if (bb->isFallThroughFromInvoke == true) {
@@ -4227,7 +4431,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
             labelList[i].opcode = kArmPseudoEntryBlock;
             if (bb->firstMIRInsn == NULL) {
                 continue;
-            } else {
+            } else if(cUnit->hasHoistedChecks) {
               setupLoopEntryBlock(cUnit, bb,
                                   &labelList[bb->fallThrough->id]);
             }
@@ -4247,7 +4451,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                     labelList[i].opcode = kArmPseudoChainingCellNormal;
                     /* handle the codegen later */
                     dvmInsertGrowableList(
-                        &chainingListByType[kChainingCellNormal], i);
+                        &(cUnit->chainingListByType[kChainingCellNormal]), i);
                     break;
                 case kChainingCellInvokeSingleton:
                     labelList[i].opcode =
@@ -4256,7 +4460,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                         (int) bb->containingMethod;
                     /* handle the codegen later */
                     dvmInsertGrowableList(
-                        &chainingListByType[kChainingCellInvokeSingleton], i);
+                        &(cUnit->chainingListByType[kChainingCellInvokeSingleton]), i);
                     break;
                 case kChainingCellInvokePredicted:
                     labelList[i].opcode =
@@ -4270,14 +4474,14 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                     labelList[i].operands[0] = labelList[i].operands[1];
                     /* handle the codegen later */
                     dvmInsertGrowableList(
-                        &chainingListByType[kChainingCellInvokePredicted], i);
+                        &(cUnit->chainingListByType[kChainingCellInvokePredicted]), i);
                     break;
                 case kChainingCellHot:
                     labelList[i].opcode =
                         kArmPseudoChainingCellHot;
                     /* handle the codegen later */
                     dvmInsertGrowableList(
-                        &chainingListByType[kChainingCellHot], i);
+                        &(cUnit->chainingListByType[kChainingCellHot]), i);
                     break;
                 case kPCReconstruction:
                     /* Make sure exception handling block is next */
@@ -4300,7 +4504,7 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                         kArmPseudoChainingCellBackwardBranch;
                     /* handle the codegen later */
                     dvmInsertGrowableList(
-                        &chainingListByType[kChainingCellBackwardBranch],
+                        &(cUnit->chainingListByType[kChainingCellBackwardBranch]),
                         i);
                     break;
                 default:
@@ -4392,6 +4596,8 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                 if (singleStepMe || cUnit->allSingleStep) {
                     notHandled = false;
                     genInterpSingleStep(cUnit, mir);
+                } else if (isInvalidMIR(cUnit, mir)) {
+                    notHandled = false;
                 } else {
                     opcodeCoverage[dalvikOpcode]++;
                     switch (dalvikFormat) {
@@ -4486,9 +4692,11 @@ void dvmCompilerMIR2LIR(CompilationUnit *cUnit)
                     break;
                 }
             }
+            dvmCompilerCheckBlockStats(cUnit,bb);
         }
 
-        if (bb->blockType == kEntryBlock) {
+        if (bb->blockType == kEntryBlock &&
+            cUnit->hasHoistedChecks) {
             dvmCompilerAppendLIR(cUnit,
                                  (LIR *) cUnit->loopAnalysis->branchToBody);
             dvmCompilerAppendLIR(cUnit,
@@ -4518,11 +4726,12 @@ gen_fallthrough:
     }
 
     /* Handle the chaining cells in predefined order */
+    cUnit->chainingCellExtraSize=0;
     for (i = 0; i < kChainingCellGap; i++) {
         size_t j;
-        int *blockIdList = (int *) chainingListByType[i].elemList;
+        int *blockIdList = (int *) (cUnit->chainingListByType[i].elemList);
 
-        cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
+        cUnit->numChainingCells[i] = cUnit->chainingListByType[i].numUsed;
 
         /* No chaining cells of this type */
         if (cUnit->numChainingCells[i] == 0)
@@ -4531,7 +4740,7 @@ gen_fallthrough:
         /* Record the first LIR for a new type of chaining cell */
         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
 
-        for (j = 0; j < chainingListByType[i].numUsed; j++) {
+        for (j = 0; j < cUnit->chainingListByType[i].numUsed; j++) {
             int blockId = blockIdList[j];
             BasicBlock *chainingBlock =
                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
@@ -4543,7 +4752,6 @@ gen_fallthrough:
             /* Insert the pseudo chaining instruction */
             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
 
-
             switch (chainingBlock->blockType) {
                 case kChainingCellNormal:
                     handleNormalChainingCell(cUnit, chainingBlock->startOffset);
@@ -4588,6 +4796,8 @@ gen_fallthrough:
         opReg(cUnit, kOpBlx, r2);
     }
 
+    dvmCompilerCheckStats(cUnit);
+
     dvmCompilerApplyGlobalOptimizations(cUnit);
 
 #if defined(WITH_SELF_VERIFICATION)
@@ -4694,9 +4904,9 @@ bool dvmCompilerArchInit()
     int i;
 
     for (i = 0; i < kArmLast; i++) {
-        if (EncodingMap[i].opcode != i) {
-            ALOGE("Encoding order for %s is wrong: expecting %d, seeing %d",
-                 EncodingMap[i].name, i, EncodingMap[i].opcode);
+        if (getEncoding((ArmOpcode)i)->opcode != i) {
+           ALOGE("Encoding order for %s is wrong: expecting %d, seeing %d",
+                 getEncoding((ArmOpcode)i)->name, i, getEncoding((ArmOpcode)i)->opcode);
             dvmAbort();  // OK to dvmAbort - build error
         }
     }
@@ -4751,3 +4961,63 @@ void dvmCompilerFlushRegWideImpl(CompilationUnit *cUnit, int rBase,
 {
     storeBaseDispWide(cUnit, rBase, displacement, rSrcLo, rSrcHi);
 }
+
+LocalOptsFuncMap localOptsFunMap = {
+
+    handleEasyDivide,
+    handleEasyMultiply,
+    handleExecuteInline,
+    handleExtendedMIR,
+    insertChainingSwitch,
+    isPopCountLE2,
+    isPowerOfTwo,
+    lowestSetBit,
+    markCard,
+    setupLoopEntryBlock,
+    genInterpSingleStep,
+    setMemRefType,
+    annotateDalvikRegAccess,
+    setupResourceMasks,
+    newLIR0,
+    newLIR1,
+    newLIR2,
+    newLIR3,
+#if defined(_ARMV7_A) || defined(_ARMV7_A_NEON)
+    newLIR4,
+#endif
+    inlinedTarget,
+    genCheckCommon,
+    loadWordDisp,
+    storeWordDisp,
+    loadValueDirect,
+    loadValueDirectFixed,
+    loadValueDirectWide,
+    loadValueDirectWideFixed,
+    loadValue,
+    storeValue,
+    loadValueWide,
+    genNullCheck,
+    genRegRegCheck,
+    genZeroCheck,
+    genBoundsCheck,
+    loadConstantNoClobber,
+    loadConstant,
+    storeValueWide,
+    genSuspendPoll,
+    storeBaseDispWide,
+    storeBaseDisp,
+    loadBaseDispWide,
+    opRegRegImm,
+    opRegRegReg,
+    loadBaseIndexed,
+    storeBaseIndexed,
+    dvmCompilerRegClassBySize,
+    encodeShift,
+    opRegReg,
+    opCondBranch,
+    genIT,
+    genBarrier,
+    modifiedImmediate,
+    genRegImmCheck,
+};
+
diff --git a/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp b/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp
index abbf2c9b4..750cbdc19 100644
--- a/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp
+++ b/vm/compiler/codegen/arm/FP/Thumb2VFP.cpp
@@ -208,6 +208,12 @@ static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
     return false;
 }
 
+__attribute__((weak)) bool genCmpFPThumb2(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
+                                    RegLocation rlSrc1, RegLocation rlSrc2)
+{
+    return true;
+}
+
 static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
                      RegLocation rlSrc1, RegLocation rlSrc2)
 {
@@ -215,6 +221,9 @@ static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
     int defaultResult;
     RegLocation rlResult;
 
+    if(!genCmpFPThumb2(cUnit, mir, rlDest, rlSrc1, rlSrc2))
+        return false;
+
     switch(mir->dalvikInsn.opcode) {
         case OP_CMPL_FLOAT:
             isDouble = false;
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.cpp b/vm/compiler/codegen/arm/LocalOptimizations.cpp
index cb35d745d..b3fb5ca0a 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.cpp
+++ b/vm/compiler/codegen/arm/LocalOptimizations.cpp
@@ -67,6 +67,23 @@ static void convertMemOpIntoMove(CompilationUnit *cUnit, ArmLIR *origLIR,
     dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) moveLIR);
 }
 
+/* placeholder function for extra check on current lir */
+__attribute__((weak)) bool checkSpecialLIR(ArmLIR **lir)
+{
+    return false;
+}
+
+__attribute__((weak)) void dumpBothLIRs(CompilationUnit *cUnit,
+                                        ArmLIR *thisLIR, ArmLIR *checkLIR)
+{
+    if(cUnit->printMe){
+        ALOGD("thisLIR");
+        dvmDumpLIRInsn((LIR*)thisLIR,0);
+        ALOGD("checkLIR");
+        dvmDumpLIRInsn((LIR*)checkLIR,0);
+    }
+}
+
 /*
  * Perform a pass of top-down walk, from the second-last instruction in the
  * superblock, to eliminate redundant loads and stores.
@@ -101,12 +118,13 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
         /* Skip non-interesting instructions */
         if ((thisLIR->flags.isNop == true) ||
             isPseudoOpcode(thisLIR->opcode) ||
-            !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) {
+            checkSpecialLIR(&thisLIR) ||
+            !(getEncoding(thisLIR->opcode)->flags & (IS_LOAD | IS_STORE))) {
             continue;
         }
 
         int nativeRegId = thisLIR->operands[0];
-        bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
+        bool isThisLIRLoad = getEncoding(thisLIR->opcode)->flags & IS_LOAD;
         ArmLIR *checkLIR;
         /* Use the mem mask to determine the rough memory location */
         u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM;
@@ -146,14 +164,14 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
              * Potential aliases seen - check the alias relations
              */
             if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
-                bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags &
+                bool isCheckLIRLoad = getEncoding(checkLIR->opcode)->flags &
                                       IS_LOAD;
                 if  (aliasCondition == ENCODE_LITERAL) {
                     /*
                      * Should only see literal loads in the instruction
                      * stream.
                      */
-                    assert(!(EncodingMap[checkLIR->opcode].flags &
+                    assert(!(getEncoding(checkLIR->opcode)->flags &
                              IS_STORE));
                     /* Same value && same register type */
                     if (checkLIR->aliasInfo == thisLIR->aliasInfo &&
@@ -216,6 +234,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
                          * case for this so we just stop here to be
                          * conservative.
                          */
+                        dumpBothLIRs(cUnit,thisLIR, checkLIR);
                         stopHere = true;
                     }
                 }
@@ -260,6 +279,7 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit,
             } else if (!checkLIR->flags.isNop) {
                 sinkDistance++;
             }
+            checkSpecialLIR(&checkLIR);
         }
     }
 }
@@ -290,7 +310,8 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
         /* Skip non-interesting instructions */
         if ((thisLIR->flags.isNop == true) ||
             isPseudoOpcode(thisLIR->opcode) ||
-            !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
+            checkSpecialLIR(&thisLIR) ||
+            !(getEncoding(thisLIR->opcode)->flags & IS_LOAD)) {
             continue;
         }
 
@@ -324,6 +345,8 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
              */
             if (checkLIR->flags.isNop) continue;
 
+            checkSpecialLIR(&checkLIR);
+
             u8 checkMemMask = checkLIR->defMask & ENCODE_MEM;
             u8 aliasCondition = stopUseAllMask & checkMemMask;
             stopHere = false;
@@ -389,7 +412,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
             ArmLIR *depLIR = prevInstList[nextSlot-1];
             /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
             if (!isPseudoOpcode(depLIR->opcode) &&
-                (EncodingMap[depLIR->opcode].flags & IS_LOAD)) {
+                (getEncoding(depLIR->opcode)->flags & IS_LOAD)) {
                 firstSlot -= LDLD_DISTANCE;
             }
             /*
@@ -409,7 +432,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
                      * If the first instruction is a load, don't hoist anything
                      * above it since it is unlikely to be beneficial.
                      */
-                    if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue;
+                    if (getEncoding(curLIR->opcode)->flags & IS_LOAD) continue;
                     /*
                      * Need to unconditionally break here even if the hoisted
                      * distance is greater than LD_LATENCY (ie more than enough
@@ -429,7 +452,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
                  * the remaining instructions are less than LD_LATENCY.
                  */
                 if (((curLIR->useMask & prevLIR->defMask) &&
-                     (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) ||
+                     (getEncoding(prevLIR->opcode)->flags & IS_LOAD)) ||
                     (slot < LD_LATENCY)) {
                     break;
                 }
@@ -453,6 +476,7 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
     }
 }
 
+#ifndef WITH_QC_PERF
 /*
  * Find all lsl/lsr and add that can be replaced with a
  * combined lsl/lsr + add
@@ -582,6 +606,7 @@ static void applyShiftArithmeticOpts(CompilationUnit *cUnit,
         }
     }
 }
+#endif
 
 void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
                                         LIR *tailLIR)
@@ -593,7 +618,9 @@ void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
     if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) {
         applyLoadHoisting(cUnit, (ArmLIR *) headLIR, (ArmLIR *) tailLIR);
     }
+#ifndef WITH_QC_PERF
     if (!(gDvmJit.disableOpt & (1 << kShiftArithmetic))) {
         applyShiftArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR);
     }
+#endif
 }
diff --git a/vm/compiler/codegen/arm/Thumb/Gen.cpp b/vm/compiler/codegen/arm/Thumb/Gen.cpp
index 622f47eff..52d492c32 100644
--- a/vm/compiler/codegen/arm/Thumb/Gen.cpp
+++ b/vm/compiler/codegen/arm/Thumb/Gen.cpp
@@ -123,7 +123,7 @@ static bool partialOverlap(int sreg1, int sreg2)
 
 static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp,
                          OpKind secondOp, RegLocation rlDest,
-                         RegLocation rlSrc1, RegLocation rlSrc2)
+                         RegLocation rlSrc1, RegLocation rlSrc2, bool setCCode)
 {
     RegLocation rlResult;
     if (partialOverlap(rlSrc1.sRegLow,rlSrc2.sRegLow) ||
@@ -275,6 +275,7 @@ static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
     opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit);
 }
 
+#ifndef WITH_QC_PERF
 static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit,
         RegLocation rlSrc, RegLocation rlResult, int lit)
 {
@@ -282,3 +283,145 @@ static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit,
     opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lit);
     opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
 }
+
+/*
+ * Generate array load
+ */
+static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlDest, int scale)
+{
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+    RegLocation rlResult;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+    int regPtr;
+
+    /* null object? */
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
+                                rlArray.lowReg, mir->offset, NULL);
+    }
+
+    regPtr = dvmCompilerAllocTemp(cUnit);
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = dvmCompilerAllocTemp(cUnit);
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        /* regPtr -> array data */
+        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        dvmCompilerFreeTemp(cUnit, regLen);
+    } else {
+        /* regPtr -> array data */
+        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+    }
+    if ((size == kLong) || (size == kDouble)) {
+        if (scale) {
+            int rNewIndex = dvmCompilerAllocTemp(cUnit);
+            opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
+            opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
+            dvmCompilerFreeTemp(cUnit, rNewIndex);
+        } else {
+            opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
+        }
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
+
+        HEAP_ACCESS_SHADOW(true);
+        loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
+        HEAP_ACCESS_SHADOW(false);
+
+        dvmCompilerFreeTemp(cUnit, regPtr);
+        storeValueWide(cUnit, rlDest, rlResult);
+    } else {
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
+
+        HEAP_ACCESS_SHADOW(true);
+        loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
+                        scale, size);
+        HEAP_ACCESS_SHADOW(false);
+
+        dvmCompilerFreeTemp(cUnit, regPtr);
+        storeValue(cUnit, rlDest, rlResult);
+    }
+}
+
+/*
+ * Generate array store
+ *
+ */
+static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlSrc, int scale)
+{
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+
+    int regPtr;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+
+    if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) {
+        dvmCompilerClobber(cUnit, rlArray.lowReg);
+        regPtr = rlArray.lowReg;
+    } else {
+        regPtr = dvmCompilerAllocTemp(cUnit);
+        genRegCopy(cUnit, regPtr, rlArray.lowReg);
+    }
+
+    /* null object? */
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
+                                mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = dvmCompilerAllocTemp(cUnit);
+        //NOTE: max live temps(4) here.
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        dvmCompilerFreeTemp(cUnit, regLen);
+    } else {
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+    }
+    /* at this point, regPtr points to array, 2 live temps */
+    if ((size == kLong) || (size == kDouble)) {
+        //TODO: need specific wide routine that can handle fp regs
+        if (scale) {
+            int rNewIndex = dvmCompilerAllocTemp(cUnit);
+            opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
+            opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
+        } else {
+            opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
+        }
+        rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+
+        HEAP_ACCESS_SHADOW(true);
+        storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
+        HEAP_ACCESS_SHADOW(false);
+
+        dvmCompilerFreeTemp(cUnit, regPtr);
+    } else {
+        rlSrc = loadValue(cUnit, rlSrc, regClass);
+
+        HEAP_ACCESS_SHADOW(true);
+        storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
+                         scale, size);
+        HEAP_ACCESS_SHADOW(false);
+    }
+}
+#endif
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.cpp b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
index b9265e823..f1fa19d29 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.cpp
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.cpp
@@ -22,6 +22,9 @@
  *
  */
 
+#define SET_CCODE   (cUnit->setCCode = true)      /* codegen changes CCode */
+#define UNSET_CCODE (cUnit->setCCode = false)     /* codegen does not change CCode */
+
 static int coreTemps[] = {r0, r1, r2, r3, r4PC, r7, r8, r9, r10, r11, r12};
 static int fpTemps[] = {fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23,
                         fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31};
@@ -150,10 +153,6 @@ static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest,
         return loadFPConstantValue(cUnit, rDest, value);
     }
 
-    /* See if the value can be constructed cheaply */
-    if (LOWREG(rDest) && (value >= 0) && (value <= 255)) {
-        return newLIR2(cUnit, kThumbMovImm, rDest, value);
-    }
     /* Check Modified immediate special cases */
     modImm = modifiedImmediate(value);
     if (modImm >= 0) {
@@ -303,9 +302,19 @@ static ArmLIR *opReg(CompilationUnit *cUnit, OpKind op, int rDestSrc)
     return newLIR1(cUnit, opcode, rDestSrc);
 }
 
+__attribute__((weak)) ArmLIR *opRegRegShiftThumb2(CompilationUnit *cUnit, OpKind op,
+                                            int rDestSrc1, int rSrc2, int shift)
+{
+    return NULL;
+}
+
 static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
                         int rSrc2, int shift)
 {
+    ArmLIR *res;
+    if((res = opRegRegShiftThumb2(cUnit, op, rDestSrc1, rSrc2, shift)))
+        return res;
+
     bool thumbForm = ((shift == 0) && LOWREG(rDestSrc1) && LOWREG(rSrc2));
     ArmOpcode opcode = kThumbBkpt;
     switch (op) {
@@ -353,7 +362,7 @@ static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
             opcode = (thumbForm) ? kThumbMul : kThumb2MulRRR;
             break;
         case kOpMvn:
-            opcode = (thumbForm) ? kThumbMvn : kThumb2MnvRR;
+            opcode = (thumbForm) ? kThumbMvn : kThumb2MvnRR;
             break;
         case kOpNeg:
             assert(shift == 0);
@@ -404,14 +413,14 @@ static ArmLIR *opRegRegShift(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
             break;
     }
     assert(opcode >= 0);
-    if (EncodingMap[opcode].flags & IS_BINARY_OP)
+    if (getEncoding(opcode)->flags & IS_BINARY_OP)
         return newLIR2(cUnit, opcode, rDestSrc1, rSrc2);
-    else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
-        if (EncodingMap[opcode].fieldLoc[2].kind == kFmtShift)
+    else if (getEncoding(opcode)->flags & IS_TERTIARY_OP) {
+        if (getEncoding(opcode)->fieldLoc[2].kind == kFmtShift)
             return newLIR3(cUnit, opcode, rDestSrc1, rSrc2, shift);
         else
             return newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, rSrc2);
-    } else if (EncodingMap[opcode].flags & IS_QUAD_OP)
+    } else if (getEncoding(opcode)->flags & IS_QUAD_OP)
         return newLIR4(cUnit, opcode, rDestSrc1, rDestSrc1, rSrc2, shift);
     else {
         assert(0);
@@ -425,9 +434,20 @@ static ArmLIR *opRegReg(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
     return opRegRegShift(cUnit, op, rDestSrc1, rSrc2, 0);
 }
 
+__attribute__((weak)) ArmLIR *opRegRegRegShiftThumb2(CompilationUnit *cUnit, OpKind op,
+                                                int rDest, int rSrc1, int rSrc2, int shift)
+{
+    return NULL;
+}
+
 static ArmLIR *opRegRegRegShift(CompilationUnit *cUnit, OpKind op,
                                 int rDest, int rSrc1, int rSrc2, int shift)
 {
+    ArmLIR *res;
+
+    if((res = opRegRegRegShiftThumb2(cUnit, op, rDest, rSrc1, rSrc2, shift)))
+        return res;
+
     ArmOpcode opcode = kThumbBkpt;
     bool thumbForm = (shift == 0) && LOWREG(rDest) && LOWREG(rSrc1) &&
                       LOWREG(rSrc2);
@@ -481,10 +501,10 @@ static ArmLIR *opRegRegRegShift(CompilationUnit *cUnit, OpKind op,
             break;
     }
     assert(opcode >= 0);
-    if (EncodingMap[opcode].flags & IS_QUAD_OP)
+    if (getEncoding(opcode)->flags & IS_QUAD_OP)
         return newLIR4(cUnit, opcode, rDest, rSrc1, rSrc2, shift);
     else {
-        assert(EncodingMap[opcode].flags & IS_TERTIARY_OP);
+        assert(getEncoding(opcode)->flags & IS_TERTIARY_OP);
         return newLIR3(cUnit, opcode, rDest, rSrc1, rSrc2);
     }
 }
@@ -495,10 +515,20 @@ static ArmLIR *opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest,
     return opRegRegRegShift(cUnit, op, rDest, rSrc1, rSrc2, 0);
 }
 
+__attribute__((weak)) ArmLIR *opRegRegImmThumb2(CompilationUnit *cUnit, OpKind op, int rDest,
+                                                    int rSrc1, int value)
+{
+    return NULL;
+}
+
 static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
                            int rSrc1, int value)
 {
     ArmLIR *res;
+
+    if((res = opRegRegImmThumb2(cUnit, op, rDest, rSrc1, value)))
+        return res;
+
     bool neg = (value < 0);
     int absValue = (neg) ? -value : value;
     ArmOpcode opcode = kThumbBkpt;
@@ -608,7 +638,7 @@ static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
     } else {
         int rScratch = dvmCompilerAllocTemp(cUnit);
         loadConstant(cUnit, rScratch, value);
-        if (EncodingMap[altOpcode].flags & IS_QUAD_OP)
+        if (getEncoding(altOpcode)->flags & IS_QUAD_OP)
             res = newLIR4(cUnit, altOpcode, rDest, rSrc1, rScratch, 0);
         else
             res = newLIR3(cUnit, altOpcode, rDest, rSrc1, rScratch);
@@ -617,10 +647,21 @@ static ArmLIR *opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest,
     }
 }
 
+__attribute__((weak)) ArmLIR *opRegImmThumb2(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
+                                            int value)
+{
+    return NULL;
+}
+
 /* Handle Thumb-only variants here - otherwise punt to opRegRegImm */
 static ArmLIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1,
                         int value)
 {
+    ArmLIR *res;
+
+    if((res = opRegImmThumb2(cUnit, op, rDestSrc1, value)))
+        return res;
+
     bool neg = (value < 0);
     int absValue = (neg) ? -value : value;
     bool shortForm = (((absValue & 0xff) == absValue) && LOWREG(rDestSrc1));
@@ -1153,11 +1194,25 @@ static void storePair(CompilationUnit *cUnit, int base, int lowReg, int highReg)
     storeBaseDispWide(cUnit, base, 0, lowReg, highReg);
 }
 
+#ifndef WITH_QC_PERF
+static void storePair(CompilationUnit *cUnit, int base, int displacement, int lowReg, int highReg)
+{
+    storeBaseDispWide(cUnit, base, displacement, lowReg, highReg);
+}
+#endif
+
 static void loadPair(CompilationUnit *cUnit, int base, int lowReg, int highReg)
 {
     loadBaseDispWide(cUnit, NULL, base, 0, lowReg, highReg, INVALID_SREG);
 }
 
+#ifndef WITH_QC_PERF
+static void loadPair(CompilationUnit *cUnit, int base, int displacement, int lowReg, int highReg)
+{
+    loadBaseDispWide(cUnit, NULL, base, displacement, lowReg, highReg, INVALID_SREG);
+}
+#endif
+
 /*
  * Generate a register comparison to an immediate and branch.  Caller
  * is responsible for setting branch target field.
@@ -1223,7 +1278,7 @@ static ArmLIR* genRegCopyNoInsert(CompilationUnit *cUnit, int rDest, int rSrc)
         return fpRegCopy(cUnit, rDest, rSrc);
     res = (ArmLIR *) dvmCompilerNew(sizeof(ArmLIR), true);
     if (LOWREG(rDest) && LOWREG(rSrc))
-        opcode = kThumbMovRR;
+        opcode = kThumb2MovRR;
     else if (!LOWREG(rDest) && !LOWREG(rSrc))
          opcode = kThumbMovRR_H2H;
     else if (LOWREG(rDest))
diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.cpp b/vm/compiler/codegen/arm/Thumb2/Gen.cpp
index df37478e3..a2adc6f7e 100644
--- a/vm/compiler/codegen/arm/Thumb2/Gen.cpp
+++ b/vm/compiler/codegen/arm/Thumb2/Gen.cpp
@@ -124,13 +124,15 @@ static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest,
 
 static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp,
                          OpKind secondOp, RegLocation rlDest,
-                         RegLocation rlSrc1, RegLocation rlSrc2)
+                         RegLocation rlSrc1, RegLocation rlSrc2, bool setCCode)
 {
     RegLocation rlResult;
     rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
     rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
     rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
+    if(setCCode) SET_CCODE;
     opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
+    if(setCCode) UNSET_CCODE;
     opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
                 rlSrc2.highReg);
     storeValueWide(cUnit, rlDest, rlResult);
@@ -307,9 +309,18 @@ static void genMonitorExit(CompilationUnit *cUnit, MIR *mir)
     // Is lock unheld on lock or held by us (==threadId) on unlock?
     opRegRegImm(cUnit, kOpAnd, r7, r2,
                 (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
+#ifdef WITH_QC_PERF
+    opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
+#endif
     newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
             LW_LOCK_OWNER_SHIFT - 1);
+#ifndef WITH_QC_PERF
     opRegRegRegShift(cUnit, kOpSub, r2, r2, r3, encodeShift(kArmLsl, LW_LOCK_OWNER_SHIFT)); // Align owner
+#else
+    SET_CCODE;
+    opRegReg(cUnit, kOpSub, r2, r3);
+    UNSET_CCODE;
+#endif
     hopBranch = opCondBranch(cUnit, kArmCondNe);
     dvmCompilerGenMemBarrier(cUnit, kSY);
     storeWordDisp(cUnit, r1, offsetof(Object, lock), r7);
@@ -351,6 +362,13 @@ static void genMonitor(CompilationUnit *cUnit, MIR *mir)
         genMonitorExit(cUnit, mir);
 }
 
+__attribute__((weak)) bool genCmpLongThumb2(CompilationUnit *cUnit, MIR *mir,
+                                    RegLocation rlDest, RegLocation rlSrc1,
+                                    RegLocation rlSrc2)
+{
+    return false;
+}
+
 /*
  * 64-bit 3way compare function.
  *     mov   r7, #-1
@@ -370,6 +388,9 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
                        RegLocation rlDest, RegLocation rlSrc1,
                        RegLocation rlSrc2)
 {
+    if(genCmpLongThumb2(cUnit, mir, rlDest, rlSrc1, rlSrc2))
+        return;
+
     RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
     ArmLIR *target1;
     ArmLIR *target2;
@@ -380,7 +401,9 @@ static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
     opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
     ArmLIR *branch1 = opCondBranch(cUnit, kArmCondLt);
     ArmLIR *branch2 = opCondBranch(cUnit, kArmCondGt);
+    SET_CCODE;
     opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
+    UNSET_CCODE;
     ArmLIR *branch3 = opCondBranch(cUnit, kArmCondEq);
 
     genIT(cUnit, kArmCondHi, "E");
@@ -453,9 +476,154 @@ static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
     }
 }
 
+#ifndef WITH_QC_PERF
 static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit,
         RegLocation rlSrc, RegLocation rlResult, int lit)
 {
     newLIR4(cUnit, kThumb2RsbRRR, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
             encodeShift(kArmLsl, lit));
 }
+
+/*
+ * Generate array load.
+ * For wide array access using scale, combine add with shift.
+ * When using offset, use ldr instruction with offset capabilities.
+ */
+static void genArrayGet(CompilationUnit *cUnit, MIR *mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlDest, int scale)
+{
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+    RegLocation rlResult;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+    int regPtr;
+
+    /* null object? */
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
+                                rlArray.lowReg, mir->offset, NULL);
+    }
+
+    regPtr = dvmCompilerAllocTemp(cUnit);
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = dvmCompilerAllocTemp(cUnit);
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        dvmCompilerFreeTemp(cUnit, regLen);
+    }
+    if ((size == kLong) || (size == kDouble)) {
+        int rNewIndex = dvmCompilerAllocTemp(cUnit);
+        if (scale) {
+            /* Combine add with shift */
+            opRegRegRegShift(cUnit, kOpAdd, rNewIndex, rlArray.lowReg,
+                             rlIndex.lowReg, encodeShift(kArmLsl, scale));
+        } else {
+            opRegRegReg(cUnit, kOpAdd, rNewIndex, regPtr, rlIndex.lowReg);
+        }
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
+
+        HEAP_ACCESS_SHADOW(true);
+        /* Use data offset */
+        loadPair(cUnit, rNewIndex, dataOffset, rlResult.lowReg, rlResult.highReg);
+        HEAP_ACCESS_SHADOW(false);
+
+        dvmCompilerFreeTemp(cUnit, rNewIndex);
+        dvmCompilerFreeTemp(cUnit, regPtr);
+        storeValueWide(cUnit, rlDest, rlResult);
+    } else {
+        /* regPtr -> array data */
+        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
+
+        HEAP_ACCESS_SHADOW(true);
+        loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
+                        scale, size);
+        HEAP_ACCESS_SHADOW(false);
+
+        dvmCompilerFreeTemp(cUnit, regPtr);
+        storeValue(cUnit, rlDest, rlResult);
+    }
+}
+
+/*
+ * Generate array store.
+ * For wide array access using scale, combine add with shift.
+ * When using offset, use str instruction with offset capabilities.
+ */
+static void genArrayPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlSrc, int scale)
+{
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+
+    int regPtr;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+
+    if (dvmCompilerIsTemp(cUnit, rlArray.lowReg)) {
+        dvmCompilerClobber(cUnit, rlArray.lowReg);
+        regPtr = rlArray.lowReg;
+    } else {
+        regPtr = dvmCompilerAllocTemp(cUnit);
+        genRegCopy(cUnit, regPtr, rlArray.lowReg);
+    }
+
+    /* null object? */
+    ArmLIR * pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
+                                mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = dvmCompilerAllocTemp(cUnit);
+        //NOTE: max live temps(4) here.
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        dvmCompilerFreeTemp(cUnit, regLen);
+    }
+    /* at this point, regPtr points to array, 2 live temps */
+    if ((size == kLong) || (size == kDouble)) {
+        //TODO: need specific wide routine that can handle fp regs
+        int rNewIndex = dvmCompilerAllocTemp(cUnit);
+        if (scale) {
+            opRegRegRegShift(cUnit, kOpAdd, rNewIndex, rlArray.lowReg,
+                             rlIndex.lowReg, encodeShift(kArmLsl, scale));
+        } else {
+            opRegRegReg(cUnit, kOpAdd, rNewIndex, regPtr, rlIndex.lowReg);
+        }
+        rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+
+        HEAP_ACCESS_SHADOW(true);
+        storePair(cUnit, rNewIndex, dataOffset, rlSrc.lowReg, rlSrc.highReg);
+        HEAP_ACCESS_SHADOW(false);
+
+        dvmCompilerFreeTemp(cUnit, rNewIndex);
+        dvmCompilerFreeTemp(cUnit, regPtr);
+    } else {
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+
+        rlSrc = loadValue(cUnit, rlSrc, regClass);
+
+        HEAP_ACCESS_SHADOW(true);
+        storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
+                         scale, size);
+        HEAP_ACCESS_SHADOW(false);
+    }
+}
+#endif
diff --git a/vm/mterp/armv5te/OP_EXECUTE_INLINE.S b/vm/mterp/armv5te/OP_EXECUTE_INLINE.S
index ca71de198..7a268dc21 100644
--- a/vm/mterp/armv5te/OP_EXECUTE_INLINE.S
+++ b/vm/mterp/armv5te/OP_EXECUTE_INLINE.S
@@ -46,6 +46,35 @@
      *   interleave a little better.  Increases code size.
      */
 .L${opcode}_continue:
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #5                  @ r0<- 4-r0
+    FETCH(rINST, 2)                     @ rINST<- FEDC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .L${opcode}_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  and     ip, rINST, #0xf000          @ isolate F
+    ldr     r3, [rFP, ip, lsr #10]      @ r3<- vF (shift right 12, left 2)
+3:  and     ip, rINST, #0x0f00          @ isolate E
+    ldr     r2, [rFP, ip, lsr #6]       @ r2<- vE
+2:  and     ip, rINST, #0x00f0          @ isolate D
+    ldr     r1, [rFP, ip, lsr #2]       @ r1<- vD
+1:  and     ip, rINST, #0x000f          @ isolate C
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vC
+0:
+    ldr     rINST, .L${opcode}_table    @ table of InlineOperation
+5:  add     rINST, pc
+    ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
+    @ (not reached)
+
+.L${opcode}_load_arg4:
+    FETCH(r1, 0)                        @ r1<- original rINST
+    mov     r0, r1, lsr #8
+    and     ip, r0, #0x000f
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vG
+    str     r0, [sp, #4]
+    b      4b
+#else
     rsb     r0, r0, #4                  @ r0<- 4-r0
     FETCH(rINST, 2)                     @ rINST<- FEDC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
@@ -63,6 +92,7 @@
 5:  add     rINST, pc
     ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
     @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
diff --git a/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S b/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S
index d9e35b85f..52951812e 100644
--- a/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S
+++ b/vm/mterp/armv5te/OP_EXECUTE_INLINE_RANGE.S
@@ -19,11 +19,11 @@
     bne     .L${opcode}_debugmode       @ yes - take slow path
 .L${opcode}_resume:
     add     r1, rSELF, #offThread_retval  @ r1<- &self->retval
-    sub     sp, sp, #8                  @ make room for arg, +64 bit align
+    sub     sp, sp, #16                  @ make room for arg, +64 bit align
     mov     r0, rINST, lsr #8           @ r0<- AA
     str     r1, [sp]                    @ push &self->retval
     bl      .L${opcode}_continue        @ make call; will return after
-    add     sp, sp, #8                  @ pop stack
+    add     sp, sp, #16                  @ pop stack
     cmp     r0, #0                      @ test boolean result of inline
     beq     common_exceptionThrown      @ returned false, handle exception
     FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
@@ -38,10 +38,17 @@
      *  lr = return addr, above  [DO NOT bl out of here w/o preserving LR]
      */
 .L${opcode}_continue:
-    rsb     r0, r0, #4                  @ r0<- 4-r0
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #7                  @ r0<- 4-r0
     FETCH(r9, 2)                        @ r9<- CCCC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
     bl      common_abort                @ (skipped due to ARM prefetch)
+8:  b      .L${opcode}_load_arg6
+    bl      common_abort                @ (skipped due to ARM prefetch)
+7:  b      .L${opcode}_load_arg5
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .L${opcode}_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
 4:  add     ip, r9, #3                  @ base+3
     GET_VREG(r3, ip)                    @ r3<- vBase[3]
 3:  add     ip, r9, #2                  @ base+2
@@ -56,6 +63,43 @@
     ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
     @ (not reached)
 
+.L${opcode}_load_arg6:
+    add     ip, r9, #6                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #12]
+    b      7b
+
+.L${opcode}_load_arg5:
+    add     ip, r9, #5                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #8]
+    b      6b
+
+.L${opcode}_load_arg4:
+    add     ip, r9, #4                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #4]
+    b      4b
+
+#else
+    rsb     r0, r0, #4                  @ r0<- 4-r0
+    FETCH(r9, 2)                        @ r9<- CCCC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  add     ip, r9, #3                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+3:  add     ip, r9, #2                  @ base+2
+    GET_VREG(r2, ip)                    @ r2<- vBase[2]
+2:  add     ip, r9, #1                  @ base+1
+    GET_VREG(r1, ip)                    @ r1<- vBase[1]
+1:  add     ip, r9, #0                  @ (nop)
+    GET_VREG(r0, ip)                    @ r0<- vBase[0]
+0:
+    ldr     r9, .L${opcode}_table       @ table of InlineOperation
+5:  add     r9, pc
+    ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
+    @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
diff --git a/vm/mterp/c/OP_EXECUTE_INLINE.cpp b/vm/mterp/c/OP_EXECUTE_INLINE.cpp
index 288ccc906..4655ae89b 100644
--- a/vm/mterp/c/OP_EXECUTE_INLINE.cpp
+++ b/vm/mterp/c/OP_EXECUTE_INLINE.cpp
@@ -1,5 +1,59 @@
 HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4;
+        arg0 = arg1 = arg2 = arg3 = arg4 = 0;
+
+        EXPORT_PC();
+
+        vsrc1 = INST_B(inst);       /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* 0-4 register indices */
+        ILOGV("|execute-inline args=%d @%d {regs=0x%04x}",
+            vsrc1, ref, vdst);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 5);
+
+        switch (vsrc1) {
+        case 5:
+            arg4 = GET_REGISTER(INST_A(inst));
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst >> 12);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER((vdst & 0x0f00) >> 8);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER((vdst & 0x00f0) >> 4);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst & 0x0f);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if( vsrc1 == 5 ) {
+            if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+                if (!dvmPerformInlineOp5Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4))
+                    GOTO_exceptionThrown();
+            } else {
+                if (!dvmPerformInlineOp5Std(arg0, arg1, arg2, arg3, &retval, ref, arg4))
+                    GOTO_exceptionThrown();
+            }
+        } else {
+            if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+                if (!dvmPerformInlineOp4Dbg(arg0, arg1, arg2, arg3, &retval, ref))
+                    GOTO_exceptionThrown();
+            } else {
+                if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
+                    GOTO_exceptionThrown();
+            }
+        }
+
+#else //ifdef INLINE_ARG_EXPANDED
         /*
          * This has the same form as other method calls, but we ignore
          * the 5th argument (vA).  This is chiefly because the first four
@@ -54,6 +108,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
diff --git a/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp b/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp
index 467f0e90e..48891d1bb 100644
--- a/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp
+++ b/vm/mterp/c/OP_EXECUTE_INLINE_RANGE.cpp
@@ -1,5 +1,56 @@
 HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6;
+        arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0;      /* placate gcc */
+        arg6 = 0;
+
+
+        EXPORT_PC();
+
+        vsrc1 = INST_AA(inst);      /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* range base */
+        ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}",
+            vsrc1, ref, vdst, vdst+vsrc1-1);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 7);
+
+        switch (vsrc1) {
+        case 7:
+            arg6 = GET_REGISTER(vdst+6);
+            /* fall through */
+        case 6:
+            arg5 = GET_REGISTER(vdst+5);
+            /* fall through */
+        case 5:
+            arg4 = GET_REGISTER(vdst+4);
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst+3);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER(vdst+2);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER(vdst+1);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst+0);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+            if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        } else {
+            if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        }
+#else //ifdef INLINE_ARG_EXPANDED
         u4 arg0, arg1, arg2, arg3;
         arg0 = arg1 = arg2 = arg3 = 0;      /* placate gcc */
 
@@ -38,6 +89,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
diff --git a/vm/mterp/common/asm-constants.h b/vm/mterp/common/asm-constants.h
index 80b36fc04..406ee78cf 100644
--- a/vm/mterp/common/asm-constants.h
+++ b/vm/mterp/common/asm-constants.h
@@ -211,6 +211,8 @@ MTERP_OFFSET(offObject_lock,            Object, lock, 4)
 /* Lock shape */
 MTERP_CONSTANT(LW_LOCK_OWNER_SHIFT, 3)
 MTERP_CONSTANT(LW_HASH_STATE_SHIFT, 1)
+MTERP_CONSTANT(LW_HASH_STATE_SIZE,  2)
+MTERP_CONSTANT(LW_HASH_STATE_ABS_MASK, 0x6)
 
 /* ArrayObject fields */
 MTERP_OFFSET(offArrayObject_length,     ArrayObject, length, 8)
diff --git a/vm/mterp/out/InterpAsm-armv5te-vfp.S b/vm/mterp/out/InterpAsm-armv5te-vfp.S
index a173c7226..c9ee0c237 100644
--- a/vm/mterp/out/InterpAsm-armv5te-vfp.S
+++ b/vm/mterp/out/InterpAsm-armv5te-vfp.S
@@ -7342,11 +7342,11 @@ dalvik_inst:
     bne     .LOP_EXECUTE_INLINE_RANGE_debugmode       @ yes - take slow path
 .LOP_EXECUTE_INLINE_RANGE_resume:
     add     r1, rSELF, #offThread_retval  @ r1<- &self->retval
-    sub     sp, sp, #8                  @ make room for arg, +64 bit align
+    sub     sp, sp, #16                  @ make room for arg, +64 bit align
     mov     r0, rINST, lsr #8           @ r0<- AA
     str     r1, [sp]                    @ push &self->retval
     bl      .LOP_EXECUTE_INLINE_RANGE_continue        @ make call; will return after
-    add     sp, sp, #8                  @ pop stack
+    add     sp, sp, #16                  @ pop stack
     cmp     r0, #0                      @ test boolean result of inline
     beq     common_exceptionThrown      @ returned false, handle exception
     FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
@@ -9516,6 +9516,35 @@ d2l_doconv:
      *   interleave a little better.  Increases code size.
      */
 .LOP_EXECUTE_INLINE_continue:
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #5                  @ r0<- 4-r0
+    FETCH(rINST, 2)                     @ rINST<- FEDC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  and     ip, rINST, #0xf000          @ isolate F
+    ldr     r3, [rFP, ip, lsr #10]      @ r3<- vF (shift right 12, left 2)
+3:  and     ip, rINST, #0x0f00          @ isolate E
+    ldr     r2, [rFP, ip, lsr #6]       @ r2<- vE
+2:  and     ip, rINST, #0x00f0          @ isolate D
+    ldr     r1, [rFP, ip, lsr #2]       @ r1<- vD
+1:  and     ip, rINST, #0x000f          @ isolate C
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vC
+0:
+    ldr     rINST, .LOP_EXECUTE_INLINE_table    @ table of InlineOperation
+5:  add     rINST, pc
+    ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
+    @ (not reached)
+
+.LOP_EXECUTE_INLINE_load_arg4:
+    FETCH(r1, 0)                        @ r1<- original rINST
+    mov     r0, r1, lsr #8
+    and     ip, r0, #0x000f
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vG
+    str     r0, [sp, #4]
+    b      4b
+#else
     rsb     r0, r0, #4                  @ r0<- 4-r0
     FETCH(rINST, 2)                     @ rINST<- FEDC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
@@ -9533,6 +9562,7 @@ d2l_doconv:
 5:  add     rINST, pc
     ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
     @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
@@ -9577,10 +9607,17 @@ d2l_doconv:
      *  lr = return addr, above  [DO NOT bl out of here w/o preserving LR]
      */
 .LOP_EXECUTE_INLINE_RANGE_continue:
-    rsb     r0, r0, #4                  @ r0<- 4-r0
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #7                  @ r0<- 4-r0
     FETCH(r9, 2)                        @ r9<- CCCC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
     bl      common_abort                @ (skipped due to ARM prefetch)
+8:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg6
+    bl      common_abort                @ (skipped due to ARM prefetch)
+7:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg5
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
 4:  add     ip, r9, #3                  @ base+3
     GET_VREG(r3, ip)                    @ r3<- vBase[3]
 3:  add     ip, r9, #2                  @ base+2
@@ -9595,6 +9632,43 @@ d2l_doconv:
     ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
     @ (not reached)
 
+.LOP_EXECUTE_INLINE_RANGE_load_arg6:
+    add     ip, r9, #6                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #12]
+    b      7b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg5:
+    add     ip, r9, #5                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #8]
+    b      6b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg4:
+    add     ip, r9, #4                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #4]
+    b      4b
+
+#else
+    rsb     r0, r0, #4                  @ r0<- 4-r0
+    FETCH(r9, 2)                        @ r9<- CCCC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  add     ip, r9, #3                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+3:  add     ip, r9, #2                  @ base+2
+    GET_VREG(r2, ip)                    @ r2<- vBase[2]
+2:  add     ip, r9, #1                  @ base+1
+    GET_VREG(r1, ip)                    @ r1<- vBase[1]
+1:  add     ip, r9, #0                  @ (nop)
+    GET_VREG(r0, ip)                    @ r0<- vBase[0]
+0:
+    ldr     r9, .LOP_EXECUTE_INLINE_RANGE_table       @ table of InlineOperation
+5:  add     r9, pc
+    ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
+    @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S
index 7b6c9d18f..5ba889d7c 100644
--- a/vm/mterp/out/InterpAsm-armv5te.S
+++ b/vm/mterp/out/InterpAsm-armv5te.S
@@ -7664,11 +7664,11 @@ d2i_doconv:
     bne     .LOP_EXECUTE_INLINE_RANGE_debugmode       @ yes - take slow path
 .LOP_EXECUTE_INLINE_RANGE_resume:
     add     r1, rSELF, #offThread_retval  @ r1<- &self->retval
-    sub     sp, sp, #8                  @ make room for arg, +64 bit align
+    sub     sp, sp, #16                  @ make room for arg, +64 bit align
     mov     r0, rINST, lsr #8           @ r0<- AA
     str     r1, [sp]                    @ push &self->retval
     bl      .LOP_EXECUTE_INLINE_RANGE_continue        @ make call; will return after
-    add     sp, sp, #8                  @ pop stack
+    add     sp, sp, #16                  @ pop stack
     cmp     r0, #0                      @ test boolean result of inline
     beq     common_exceptionThrown      @ returned false, handle exception
     FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
@@ -9974,6 +9974,35 @@ d2l_doconv:
      *   interleave a little better.  Increases code size.
      */
 .LOP_EXECUTE_INLINE_continue:
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #5                  @ r0<- 4-r0
+    FETCH(rINST, 2)                     @ rINST<- FEDC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  and     ip, rINST, #0xf000          @ isolate F
+    ldr     r3, [rFP, ip, lsr #10]      @ r3<- vF (shift right 12, left 2)
+3:  and     ip, rINST, #0x0f00          @ isolate E
+    ldr     r2, [rFP, ip, lsr #6]       @ r2<- vE
+2:  and     ip, rINST, #0x00f0          @ isolate D
+    ldr     r1, [rFP, ip, lsr #2]       @ r1<- vD
+1:  and     ip, rINST, #0x000f          @ isolate C
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vC
+0:
+    ldr     rINST, .LOP_EXECUTE_INLINE_table    @ table of InlineOperation
+5:  add     rINST, pc
+    ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
+    @ (not reached)
+
+.LOP_EXECUTE_INLINE_load_arg4:
+    FETCH(r1, 0)                        @ r1<- original rINST
+    mov     r0, r1, lsr #8
+    and     ip, r0, #0x000f
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vG
+    str     r0, [sp, #4]
+    b      4b
+#else
     rsb     r0, r0, #4                  @ r0<- 4-r0
     FETCH(rINST, 2)                     @ rINST<- FEDC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
@@ -9991,6 +10020,7 @@ d2l_doconv:
 5:  add     rINST, pc
     ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
     @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
@@ -10035,10 +10065,17 @@ d2l_doconv:
      *  lr = return addr, above  [DO NOT bl out of here w/o preserving LR]
      */
 .LOP_EXECUTE_INLINE_RANGE_continue:
-    rsb     r0, r0, #4                  @ r0<- 4-r0
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #7                  @ r0<- 4-r0
     FETCH(r9, 2)                        @ r9<- CCCC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
     bl      common_abort                @ (skipped due to ARM prefetch)
+8:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg6
+    bl      common_abort                @ (skipped due to ARM prefetch)
+7:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg5
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
 4:  add     ip, r9, #3                  @ base+3
     GET_VREG(r3, ip)                    @ r3<- vBase[3]
 3:  add     ip, r9, #2                  @ base+2
@@ -10053,6 +10090,43 @@ d2l_doconv:
     ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
     @ (not reached)
 
+.LOP_EXECUTE_INLINE_RANGE_load_arg6:
+    add     ip, r9, #6                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #12]
+    b      7b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg5:
+    add     ip, r9, #5                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #8]
+    b      6b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg4:
+    add     ip, r9, #4                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #4]
+    b      4b
+
+#else
+    rsb     r0, r0, #4                  @ r0<- 4-r0
+    FETCH(r9, 2)                        @ r9<- CCCC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  add     ip, r9, #3                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+3:  add     ip, r9, #2                  @ base+2
+    GET_VREG(r2, ip)                    @ r2<- vBase[2]
+2:  add     ip, r9, #1                  @ base+1
+    GET_VREG(r1, ip)                    @ r1<- vBase[1]
+1:  add     ip, r9, #0                  @ (nop)
+    GET_VREG(r0, ip)                    @ r0<- vBase[0]
+0:
+    ldr     r9, .LOP_EXECUTE_INLINE_RANGE_table       @ table of InlineOperation
+5:  add     r9, pc
+    ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
+    @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
diff --git a/vm/mterp/out/InterpAsm-armv7-a-neon.S b/vm/mterp/out/InterpAsm-armv7-a-neon.S
index c3419c230..7d3b08f46 100644
--- a/vm/mterp/out/InterpAsm-armv7-a-neon.S
+++ b/vm/mterp/out/InterpAsm-armv7-a-neon.S
@@ -7300,11 +7300,11 @@ dalvik_inst:
     bne     .LOP_EXECUTE_INLINE_RANGE_debugmode       @ yes - take slow path
 .LOP_EXECUTE_INLINE_RANGE_resume:
     add     r1, rSELF, #offThread_retval  @ r1<- &self->retval
-    sub     sp, sp, #8                  @ make room for arg, +64 bit align
+    sub     sp, sp, #16                  @ make room for arg, +64 bit align
     mov     r0, rINST, lsr #8           @ r0<- AA
     str     r1, [sp]                    @ push &self->retval
     bl      .LOP_EXECUTE_INLINE_RANGE_continue        @ make call; will return after
-    add     sp, sp, #8                  @ pop stack
+    add     sp, sp, #16                  @ pop stack
     cmp     r0, #0                      @ test boolean result of inline
     beq     common_exceptionThrown      @ returned false, handle exception
     FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
@@ -9453,6 +9453,35 @@ d2l_doconv:
      *   interleave a little better.  Increases code size.
      */
 .LOP_EXECUTE_INLINE_continue:
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #5                  @ r0<- 4-r0
+    FETCH(rINST, 2)                     @ rINST<- FEDC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  and     ip, rINST, #0xf000          @ isolate F
+    ldr     r3, [rFP, ip, lsr #10]      @ r3<- vF (shift right 12, left 2)
+3:  and     ip, rINST, #0x0f00          @ isolate E
+    ldr     r2, [rFP, ip, lsr #6]       @ r2<- vE
+2:  and     ip, rINST, #0x00f0          @ isolate D
+    ldr     r1, [rFP, ip, lsr #2]       @ r1<- vD
+1:  and     ip, rINST, #0x000f          @ isolate C
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vC
+0:
+    ldr     rINST, .LOP_EXECUTE_INLINE_table    @ table of InlineOperation
+5:  add     rINST, pc
+    ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
+    @ (not reached)
+
+.LOP_EXECUTE_INLINE_load_arg4:
+    FETCH(r1, 0)                        @ r1<- original rINST
+    mov     r0, r1, lsr #8
+    and     ip, r0, #0x000f
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vG
+    str     r0, [sp, #4]
+    b      4b
+#else
     rsb     r0, r0, #4                  @ r0<- 4-r0
     FETCH(rINST, 2)                     @ rINST<- FEDC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
@@ -9470,6 +9499,7 @@ d2l_doconv:
 5:  add     rINST, pc
     ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
     @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
@@ -9514,10 +9544,17 @@ d2l_doconv:
      *  lr = return addr, above  [DO NOT bl out of here w/o preserving LR]
      */
 .LOP_EXECUTE_INLINE_RANGE_continue:
-    rsb     r0, r0, #4                  @ r0<- 4-r0
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #7                  @ r0<- 4-r0
     FETCH(r9, 2)                        @ r9<- CCCC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
     bl      common_abort                @ (skipped due to ARM prefetch)
+8:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg6
+    bl      common_abort                @ (skipped due to ARM prefetch)
+7:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg5
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
 4:  add     ip, r9, #3                  @ base+3
     GET_VREG(r3, ip)                    @ r3<- vBase[3]
 3:  add     ip, r9, #2                  @ base+2
@@ -9532,6 +9569,43 @@ d2l_doconv:
     ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
     @ (not reached)
 
+.LOP_EXECUTE_INLINE_RANGE_load_arg6:
+    add     ip, r9, #6                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #12]
+    b      7b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg5:
+    add     ip, r9, #5                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #8]
+    b      6b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg4:
+    add     ip, r9, #4                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #4]
+    b      4b
+
+#else
+    rsb     r0, r0, #4                  @ r0<- 4-r0
+    FETCH(r9, 2)                        @ r9<- CCCC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  add     ip, r9, #3                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+3:  add     ip, r9, #2                  @ base+2
+    GET_VREG(r2, ip)                    @ r2<- vBase[2]
+2:  add     ip, r9, #1                  @ base+1
+    GET_VREG(r1, ip)                    @ r1<- vBase[1]
+1:  add     ip, r9, #0                  @ (nop)
+    GET_VREG(r0, ip)                    @ r0<- vBase[0]
+0:
+    ldr     r9, .LOP_EXECUTE_INLINE_RANGE_table       @ table of InlineOperation
+5:  add     r9, pc
+    ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
+    @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
diff --git a/vm/mterp/out/InterpAsm-armv7-a.S b/vm/mterp/out/InterpAsm-armv7-a.S
index 254224566..199d59acb 100644
--- a/vm/mterp/out/InterpAsm-armv7-a.S
+++ b/vm/mterp/out/InterpAsm-armv7-a.S
@@ -7300,11 +7300,11 @@ dalvik_inst:
     bne     .LOP_EXECUTE_INLINE_RANGE_debugmode       @ yes - take slow path
 .LOP_EXECUTE_INLINE_RANGE_resume:
     add     r1, rSELF, #offThread_retval  @ r1<- &self->retval
-    sub     sp, sp, #8                  @ make room for arg, +64 bit align
+    sub     sp, sp, #16                  @ make room for arg, +64 bit align
     mov     r0, rINST, lsr #8           @ r0<- AA
     str     r1, [sp]                    @ push &self->retval
     bl      .LOP_EXECUTE_INLINE_RANGE_continue        @ make call; will return after
-    add     sp, sp, #8                  @ pop stack
+    add     sp, sp, #16                  @ pop stack
     cmp     r0, #0                      @ test boolean result of inline
     beq     common_exceptionThrown      @ returned false, handle exception
     FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
@@ -9453,6 +9453,35 @@ d2l_doconv:
      *   interleave a little better.  Increases code size.
      */
 .LOP_EXECUTE_INLINE_continue:
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #5                  @ r0<- 4-r0
+    FETCH(rINST, 2)                     @ rINST<- FEDC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  and     ip, rINST, #0xf000          @ isolate F
+    ldr     r3, [rFP, ip, lsr #10]      @ r3<- vF (shift right 12, left 2)
+3:  and     ip, rINST, #0x0f00          @ isolate E
+    ldr     r2, [rFP, ip, lsr #6]       @ r2<- vE
+2:  and     ip, rINST, #0x00f0          @ isolate D
+    ldr     r1, [rFP, ip, lsr #2]       @ r1<- vD
+1:  and     ip, rINST, #0x000f          @ isolate C
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vC
+0:
+    ldr     rINST, .LOP_EXECUTE_INLINE_table    @ table of InlineOperation
+5:  add     rINST, pc
+    ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
+    @ (not reached)
+
+.LOP_EXECUTE_INLINE_load_arg4:
+    FETCH(r1, 0)                        @ r1<- original rINST
+    mov     r0, r1, lsr #8
+    and     ip, r0, #0x000f
+    ldr     r0, [rFP, ip, lsl #2]       @ r0<- vG
+    str     r0, [sp, #4]
+    b      4b
+#else
     rsb     r0, r0, #4                  @ r0<- 4-r0
     FETCH(rINST, 2)                     @ rINST<- FEDC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
@@ -9470,6 +9499,7 @@ d2l_doconv:
 5:  add     rINST, pc
     ldr     pc, [rINST, r10, lsl #4]    @ sizeof=16, "func" is first entry
     @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
@@ -9514,10 +9544,17 @@ d2l_doconv:
      *  lr = return addr, above  [DO NOT bl out of here w/o preserving LR]
      */
 .LOP_EXECUTE_INLINE_RANGE_continue:
-    rsb     r0, r0, #4                  @ r0<- 4-r0
+#ifdef INLINE_ARG_EXPANDED
+    rsb     r0, r0, #7                  @ r0<- 4-r0
     FETCH(r9, 2)                        @ r9<- CCCC
     add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
     bl      common_abort                @ (skipped due to ARM prefetch)
+8:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg6
+    bl      common_abort                @ (skipped due to ARM prefetch)
+7:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg5
+    bl      common_abort                @ (skipped due to ARM prefetch)
+6:  b      .LOP_EXECUTE_INLINE_RANGE_load_arg4
+    bl      common_abort                @ (skipped due to ARM prefetch)
 4:  add     ip, r9, #3                  @ base+3
     GET_VREG(r3, ip)                    @ r3<- vBase[3]
 3:  add     ip, r9, #2                  @ base+2
@@ -9532,6 +9569,43 @@ d2l_doconv:
     ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
     @ (not reached)
 
+.LOP_EXECUTE_INLINE_RANGE_load_arg6:
+    add     ip, r9, #6                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #12]
+    b      7b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg5:
+    add     ip, r9, #5                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #8]
+    b      6b
+
+.LOP_EXECUTE_INLINE_RANGE_load_arg4:
+    add     ip, r9, #4                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+    str     r3, [sp, #4]
+    b      4b
+
+#else
+    rsb     r0, r0, #4                  @ r0<- 4-r0
+    FETCH(r9, 2)                        @ r9<- CCCC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  add     ip, r9, #3                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+3:  add     ip, r9, #2                  @ base+2
+    GET_VREG(r2, ip)                    @ r2<- vBase[2]
+2:  add     ip, r9, #1                  @ base+1
+    GET_VREG(r1, ip)                    @ r1<- vBase[1]
+1:  add     ip, r9, #0                  @ (nop)
+    GET_VREG(r0, ip)                    @ r0<- vBase[0]
+0:
+    ldr     r9, .LOP_EXECUTE_INLINE_RANGE_table       @ table of InlineOperation
+5:  add     r9, pc
+    ldr     pc, [r9, r10, lsl #4]       @ sizeof=16, "func" is first entry
+    @ (not reached)
+#endif
 
     /*
      * We're debugging or profiling.
diff --git a/vm/mterp/out/InterpC-allstubs.cpp b/vm/mterp/out/InterpC-allstubs.cpp
index 1ef878370..5258cbcee 100644
--- a/vm/mterp/out/InterpC-allstubs.cpp
+++ b/vm/mterp/out/InterpC-allstubs.cpp
@@ -2810,6 +2810,60 @@ OP_END
 /* File: c/OP_EXECUTE_INLINE.cpp */
 HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4;
+        arg0 = arg1 = arg2 = arg3 = arg4 = 0;
+
+        EXPORT_PC();
+
+        vsrc1 = INST_B(inst);       /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* 0-4 register indices */
+        ILOGV("|execute-inline args=%d @%d {regs=0x%04x}",
+            vsrc1, ref, vdst);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 5);
+
+        switch (vsrc1) {
+        case 5:
+            arg4 = GET_REGISTER(INST_A(inst));
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst >> 12);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER((vdst & 0x0f00) >> 8);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER((vdst & 0x00f0) >> 4);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst & 0x0f);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if( vsrc1 == 5 ) {
+            if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+                if (!dvmPerformInlineOp5Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4))
+                    GOTO_exceptionThrown();
+            } else {
+                if (!dvmPerformInlineOp5Std(arg0, arg1, arg2, arg3, &retval, ref, arg4))
+                    GOTO_exceptionThrown();
+            }
+        } else {
+            if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+                if (!dvmPerformInlineOp4Dbg(arg0, arg1, arg2, arg3, &retval, ref))
+                    GOTO_exceptionThrown();
+            } else {
+                if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
+                    GOTO_exceptionThrown();
+            }
+        }
+
+#else //ifdef INLINE_ARG_EXPANDED
         /*
          * This has the same form as other method calls, but we ignore
          * the 5th argument (vA).  This is chiefly because the first four
@@ -2864,6 +2918,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
@@ -2871,6 +2926,57 @@ OP_END
 /* File: c/OP_EXECUTE_INLINE_RANGE.cpp */
 HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6;
+        arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0;      /* placate gcc */
+        arg6 = 0;
+
+
+        EXPORT_PC();
+
+        vsrc1 = INST_AA(inst);      /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* range base */
+        ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}",
+            vsrc1, ref, vdst, vdst+vsrc1-1);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 7);
+
+        switch (vsrc1) {
+        case 7:
+            arg6 = GET_REGISTER(vdst+6);
+            /* fall through */
+        case 6:
+            arg5 = GET_REGISTER(vdst+5);
+            /* fall through */
+        case 5:
+            arg4 = GET_REGISTER(vdst+4);
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst+3);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER(vdst+2);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER(vdst+1);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst+0);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+            if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        } else {
+            if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        }
+#else //ifdef INLINE_ARG_EXPANDED
         u4 arg0, arg1, arg2, arg3;
         arg0 = arg1 = arg2 = arg3 = 0;      /* placate gcc */
 
@@ -2909,6 +3015,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
diff --git a/vm/mterp/out/InterpC-portable.cpp b/vm/mterp/out/InterpC-portable.cpp
index 0328aa883..ee02aa1e8 100644
--- a/vm/mterp/out/InterpC-portable.cpp
+++ b/vm/mterp/out/InterpC-portable.cpp
@@ -2821,6 +2821,60 @@ OP_END
 /* File: c/OP_EXECUTE_INLINE.cpp */
 HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4;
+        arg0 = arg1 = arg2 = arg3 = arg4 = 0;
+
+        EXPORT_PC();
+
+        vsrc1 = INST_B(inst);       /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* 0-4 register indices */
+        ILOGV("|execute-inline args=%d @%d {regs=0x%04x}",
+            vsrc1, ref, vdst);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 5);
+
+        switch (vsrc1) {
+        case 5:
+            arg4 = GET_REGISTER(INST_A(inst));
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst >> 12);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER((vdst & 0x0f00) >> 8);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER((vdst & 0x00f0) >> 4);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst & 0x0f);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if( vsrc1 == 5 ) {
+            if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+                if (!dvmPerformInlineOp5Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4))
+                    GOTO_exceptionThrown();
+            } else {
+                if (!dvmPerformInlineOp5Std(arg0, arg1, arg2, arg3, &retval, ref, arg4))
+                    GOTO_exceptionThrown();
+            }
+        } else {
+            if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+                if (!dvmPerformInlineOp4Dbg(arg0, arg1, arg2, arg3, &retval, ref))
+                    GOTO_exceptionThrown();
+            } else {
+                if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
+                    GOTO_exceptionThrown();
+            }
+        }
+
+#else //ifdef INLINE_ARG_EXPANDED
         /*
          * This has the same form as other method calls, but we ignore
          * the 5th argument (vA).  This is chiefly because the first four
@@ -2875,6 +2929,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE /*vB, {vD, vE, vF, vG}, inline@CCCC*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
@@ -2882,6 +2937,57 @@ OP_END
 /* File: c/OP_EXECUTE_INLINE_RANGE.cpp */
 HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6;
+        arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0;      /* placate gcc */
+        arg6 = 0;
+
+
+        EXPORT_PC();
+
+        vsrc1 = INST_AA(inst);      /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* range base */
+        ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}",
+            vsrc1, ref, vdst, vdst+vsrc1-1);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 7);
+
+        switch (vsrc1) {
+        case 7:
+            arg6 = GET_REGISTER(vdst+6);
+            /* fall through */
+        case 6:
+            arg5 = GET_REGISTER(vdst+5);
+            /* fall through */
+        case 5:
+            arg4 = GET_REGISTER(vdst+4);
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst+3);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER(vdst+2);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER(vdst+1);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst+0);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+            if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        } else {
+            if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        }
+#else //ifdef INLINE_ARG_EXPANDED
         u4 arg0, arg1, arg2, arg3;
         arg0 = arg1 = arg2 = arg3 = 0;      /* placate gcc */
 
@@ -2920,6 +3026,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
diff --git a/vm/mterp/out/InterpC-x86.cpp b/vm/mterp/out/InterpC-x86.cpp
index 77dc8885c..eb8a1e9e4 100644
--- a/vm/mterp/out/InterpC-x86.cpp
+++ b/vm/mterp/out/InterpC-x86.cpp
@@ -1181,6 +1181,57 @@ OP_END
 /* File: c/OP_EXECUTE_INLINE_RANGE.cpp */
 HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
     {
+#ifdef INLINE_ARG_EXPANDED
+        u4 arg0, arg1, arg2, arg3, arg4, arg5, arg6;
+        arg0 = arg1 = arg2 = arg3 = arg4 = arg5 = 0;      /* placate gcc */
+        arg6 = 0;
+
+
+        EXPORT_PC();
+
+        vsrc1 = INST_AA(inst);      /* #of args */
+        ref = FETCH(1);             /* inline call "ref" */
+        vdst = FETCH(2);            /* range base */
+        ALOGE("|execute-inline-range args=%d @%d {regs=v%d-v%d}",
+            vsrc1, ref, vdst, vdst+vsrc1-1);
+
+        assert((vdst >> 16) == 0);  // 16-bit type -or- high 16 bits clear
+        assert(vsrc1 <= 7);
+
+        switch (vsrc1) {
+        case 7:
+            arg6 = GET_REGISTER(vdst+6);
+            /* fall through */
+        case 6:
+            arg5 = GET_REGISTER(vdst+5);
+            /* fall through */
+        case 5:
+            arg4 = GET_REGISTER(vdst+4);
+            /* fall through */
+        case 4:
+            arg3 = GET_REGISTER(vdst+3);
+            /* fall through */
+        case 3:
+            arg2 = GET_REGISTER(vdst+2);
+            /* fall through */
+        case 2:
+            arg1 = GET_REGISTER(vdst+1);
+            /* fall through */
+        case 1:
+            arg0 = GET_REGISTER(vdst+0);
+            /* fall through */
+        default:        // case 0
+            ;
+        }
+
+        if (self->interpBreak.ctl.subMode & kSubModeDebuggerActive) {
+            if (!dvmPerformInlineOp7Dbg(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        } else {
+            if (!dvmPerformInlineOp7Std(arg0, arg1, arg2, arg3, &retval, ref, arg4, arg5, arg6))
+                GOTO_exceptionThrown();
+        }
+#else //ifdef INLINE_ARG_EXPANDED
         u4 arg0, arg1, arg2, arg3;
         arg0 = arg1 = arg2 = arg3 = 0;      /* placate gcc */
 
@@ -1219,6 +1270,7 @@ HANDLE_OPCODE(OP_EXECUTE_INLINE_RANGE /*{vCCCC..v(CCCC+AA-1)}, inline@BBBB*/)
             if (!dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref))
                 GOTO_exceptionThrown();
         }
+#endif //ifdef INLINE_ARG_EXPANDED
     }
     FINISH(3);
 OP_END
diff --git a/vm/oo/Class.cpp b/vm/oo/Class.cpp
index 2a23a9f9a..a5369fb35 100644
--- a/vm/oo/Class.cpp
+++ b/vm/oo/Class.cpp
@@ -1845,16 +1845,23 @@ static ClassObject* loadClassFromDex0(DvmDex* pDvmDex,
     }
 
     if (pHeader->instanceFieldsSize != 0) {
-        int count = (int) pHeader->instanceFieldsSize;
-        u4 lastIndex = 0;
-        DexField field;
-
-        newClass->ifieldCount = count;
-        newClass->ifields = (InstField*) dvmLinearAlloc(classLoader,
-                count * sizeof(InstField));
-        for (i = 0; i < count; i++) {
-            dexReadClassDataField(&pEncodedData, &field, &lastIndex);
-            loadIFieldFromDex(newClass, &field, &newClass->ifields[i]);
+        OptClassMap* optClass = getOptClassHandler(newClass);
+
+        if(optClass != NULL){
+            optClass->handleIfield(newClass, classLoader, pHeader, &pEncodedData);
+        }else{
+            int count = (int) pHeader->instanceFieldsSize;
+            u4 lastIndex = 0;
+            DexField field;
+
+            newClass->ifieldCount = count;
+            newClass->ifields = (InstField*) dvmLinearAlloc(classLoader,
+                                                    count * sizeof(InstField));
+
+            for (i = 0; i < count; i++) {
+                dexReadClassDataField(&pEncodedData, &field, &lastIndex);
+                loadIFieldFromDex(newClass, &field, &newClass->ifields[i]);
+            }
         }
         dvmLinearReadOnly(classLoader, newClass->ifields);
     }
@@ -3914,6 +3921,9 @@ static void initSFields(ClassObject* clazz)
     }
 }
 
+void dvmInitSFields(ClassObject* clazz){
+    return initSFields(clazz);
+}
 
 /*
  * Determine whether "descriptor" yields the same class object in the
@@ -4920,3 +4930,8 @@ int dvmCompareNameDescriptorAndMethod(const char* name,
 
     return dvmCompareDescriptorAndMethodProto(descriptor, method);
 }
+
+__attribute__((weak)) OptClassMap* getOptClassHandler(ClassObject*  newClass){
+    return NULL;
+}
+
diff --git a/vm/oo/Class.h b/vm/oo/Class.h
index 349c66692..ca76f6623 100644
--- a/vm/oo/Class.h
+++ b/vm/oo/Class.h
@@ -19,6 +19,8 @@
 #ifndef DALVIK_OO_CLASS_H_
 #define DALVIK_OO_CLASS_H_
 
+#include "libdex/DexClass.h"
+
 /*
  * The classpath and bootclasspath differ in that only the latter is
  * consulted when looking for classes needed by the VM.  When searching
@@ -281,4 +283,11 @@ int dvmCompareNameDescriptorAndMethod(const char* name,
  */
 size_t dvmClassObjectSize(const ClassObject *clazz);
 
+typedef struct OptClassMap{
+    const char* descriptor;
+    void (*handleIfield) (ClassObject* newClass, Object* classLoader, const DexClassDataHeader* pHeader, const u1** pData);
+} OptClassMap;
+
+OptClassMap* getOptClassHandler(ClassObject*  newClass);
+
 #endif  // DALVIK_OO_CLASS_H_
author	Steve Kondik <shade@chemlab.org>	2013-11-11 00:32:52 -0800
committer	Steve Kondik <shade@chemlab.org>	2013-11-11 00:32:52 -0800
commit	bab417cc2aceee45238d5648975118bf3dd4c2e9 (patch)
tree	39f1867dee9fe25cf7174917ef39ea3dd361fca4 /vm
parent	5531b23c1546fdf896db25f7412291bada6e723c (diff)
parent	e17852495a15ddad079305c725d067ac95e4d655 (diff)
download	android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.tar.gz android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.tar.bz2 android_dalvik-bab417cc2aceee45238d5648975118bf3dd4c2e9.zip