dalvik: Finish armv6-vfp support

Change-Id: Ie6f17e8b488c528e1c1900d7f4700110755f4dff
author: Ricardo Cerqueira <cyanogenmod@cerqueira.org> 2010-11-09 01:27:18 +0000
committer: Ricardo Cerqueira <cyanogenmod@cerqueira.org> 2010-11-09 01:27:18 +0000
commit: 27ed151a61235e67a22629df8b66a01bc16e4502 (patch)
tree: acea55cdf3e34ef259a3b40351f484f786947806
parent: c8257fdc5455bd26cc20f12e6c6cdb9377fda20a (diff)
download: android_dalvik-27ed151a61235e67a22629df8b66a01bc16e4502.tar.gz
android_dalvik-27ed151a61235e67a22629df8b66a01bc16e4502.tar.bz2
android_dalvik-27ed151a61235e67a22629df8b66a01bc16e4502.zip
38 files changed, 2522 insertions, 59 deletions
diff --git a/vm/compiler/codegen/arm/armv6-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv6-vfp/ArchVariant.c
new file mode 100644
index 000000000..7f9fa3ba9
--- /dev/null
+++ b/vm/compiler/codegen/arm/armv6-vfp/ArchVariant.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file is included by Codegen-armv5te-vfp.c, and implements architecture
+ * variant-specific code.
+ */
+
+/*
+ * Determine the initial instruction set to be used for this trace.
+ * Later components may decide to change this.
+ */
+JitInstructionSetType dvmCompilerInstructionSet(void)
+{
+    return DALVIK_JIT_THUMB;
+}
+
+/* Architecture-specific initializations and checks go here */
+bool dvmCompilerArchVariantInit(void)
+{
+    /* First, declare dvmCompiler_TEMPLATE_XXX for each template */
+#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X();
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    int i = 0;
+    extern void dvmCompilerTemplateStart(void);
+
+    /*
+     * Then, populate the templateEntryOffsets array with the offsets from the
+     * the dvmCompilerTemplateStart symbol for each template.
+     */
+#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \
+    (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart;
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+#undef JIT_TEMPLATE
+
+    /* Target-specific configuration */
+    gDvmJit.jitTableSize = 1 << 9; // 512
+    gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
+    gDvmJit.threshold = 200;
+    gDvmJit.codeCacheSize = 512*1024;
+
+#if defined(WITH_SELF_VERIFICATION)
+    /* Force into blocking mode */
+    gDvmJit.blockingMode = true;
+    gDvm.nativeDebuggerActive = true;
+#endif
+
+    /* Codegen-specific assumptions */
+    assert(offsetof(ClassObject, vtable) < 128 &&
+           (offsetof(ClassObject, vtable) & 0x3) == 0);
+    assert(offsetof(ArrayObject, length) < 128 &&
+           (offsetof(ArrayObject, length) & 0x3) == 0);
+    assert(offsetof(ArrayObject, contents) < 256);
+
+    /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */
+    assert(sizeof(StackSaveArea) < 236);
+
+    /*
+     * EA is calculated by doing "Rn + imm5 << 2", make sure that the last
+     * offset from the struct is less than 128.
+     */
+    assert((offsetof(InterpState, jitToInterpEntries) +
+            sizeof(struct JitToInterpEntries)) <= 128);
+    return true;
+}
+
+int dvmCompilerTargetOptHint(int key)
+{
+    int res;
+    switch (key) {
+        case kMaxHoistDistance:
+            res = 2;
+            break;
+        default:
+            LOGE("Unknown target optimization hint key: %d",key);
+            res = 0;
+    }
+    return res;
+}
diff --git a/vm/compiler/codegen/arm/armv6-vfp/ArchVariant.h b/vm/compiler/codegen/arm/armv6-vfp/ArchVariant.h
new file mode 100644
index 000000000..9f862e840
--- /dev/null
+++ b/vm/compiler/codegen/arm/armv6-vfp/ArchVariant.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMV5TE_VFP_ARCHVARIANT_H
+#define _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMV5TE_VFP_ARCHVARIANT_H
+
+/* Create the TemplateOpcode enum */
+#define JIT_TEMPLATE(X) TEMPLATE_##X,
+typedef enum {
+#include "../../../template/armv5te-vfp/TemplateOpList.h"
+/*
+ * For example,
+ *     TEMPLATE_CMP_LONG,
+ *     TEMPLATE_RETURN,
+ *     ...
+ */
+    TEMPLATE_LAST_MARK,
+} TemplateOpCode;
+#undef JIT_TEMPLATE
+
+#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMV5TE_VFP_ARCHVARIANT_H */
diff --git a/vm/compiler/codegen/arm/armv6-vfp/CallingConvention.S b/vm/compiler/codegen/arm/armv6-vfp/CallingConvention.S
new file mode 100644
index 000000000..4f1239564
--- /dev/null
+++ b/vm/compiler/codegen/arm/armv6-vfp/CallingConvention.S
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Save & restore for callee-save FP registers.
+ * On entry:
+ *    r0 : pointer to save area of JIT_CALLEE_SAVE_WORD_SIZE
+ */
+    .text
+    .align 2
+    .global dvmJitCalleeSave
+    .type dvmJitCalleeSave, %function
+dvmJitCalleeSave:
+    vstmia r0, {d8-d15}
+    bx     lr
+
+    .global dvmJitCalleeRestore
+    .type dvmJitCalleeRestore, %function
+dvmJitCalleeRestore:
+    vldmia r0, {d8-d15}
+    bx     lr
diff --git a/vm/compiler/codegen/arm/armv6-vfp/Codegen.c b/vm/compiler/codegen/arm/armv6-vfp/Codegen.c
new file mode 100644
index 000000000..04bb3a2b7
--- /dev/null
+++ b/vm/compiler/codegen/arm/armv6-vfp/Codegen.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+#include "interp/InterpDefs.h"
+#include "libdex/OpCode.h"
+#include "dexdump/OpCodeNames.h"
+#include "compiler/CompilerInternals.h"
+#include "compiler/codegen/arm/ArmLIR.h"
+#include "mterp/common/FindInterface.h"
+#include "compiler/codegen/arm/Ralloc.h"
+#include "compiler/codegen/arm/Codegen.h"
+#include "compiler/Loop.h"
+#include "ArchVariant.h"
+
+/* Architectural independent building blocks */
+#include "../CodegenCommon.c"
+
+/* Thumb-specific factory utilities */
+#include "../Thumb/Factory.c"
+/* Factory utilities dependent on arch-specific features */
+#include "../CodegenFactory.c"
+
+/* Thumb-specific codegen routines */
+#include "../Thumb/Gen.c"
+/* Thumb+VFP codegen routines */
+#include "../FP/ThumbVFP.c"
+
+/* Thumb-specific register allocation */
+#include "../Thumb/Ralloc.c"
+
+/* MIR2LIR dispatcher and architectural independent codegen routines */
+#include "../CodegenDriver.c"
+
+/* Architecture manifest */
+#include "ArchVariant.c"
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_ADD_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_ADD_DOUBLE_VFP.S
new file mode 100644
index 000000000..51693fa0e
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_ADD_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"faddd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_ADD_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_ADD_FLOAT_VFP.S
new file mode 100644
index 000000000..ad1e12211
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_ADD_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fadds   s2, s0, s1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
new file mode 100644
index 000000000..1b143a952
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
@@ -0,0 +1,34 @@
+%verify "executed"
+%verify "basic lt, gt, eq */
+%verify "left arg NaN"
+%verify "right arg NaN"
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     *
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    fldd    d0, [r0]                    @ d0<- vBB
+    fldd    d1, [r1]                    @ d1<- vCC
+    fcmpd  d0, d1                       @ compare (vBB, vCC)
+    mov     r0, #1                      @ r0<- 1 (default)
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_CMPG_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
new file mode 100644
index 000000000..0510ef69d
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
@@ -0,0 +1,32 @@
+%verify "executed"
+%verify "basic lt, gt, eq */
+%verify "left arg NaN"
+%verify "right arg NaN"
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    flds    s0, [r0]                    @ d0<- vBB
+    flds    s1, [r1]                    @ d1<- vCC
+    fcmps  s0, s1                      @ compare (vBB, vCC)
+    mov     r0, #1                      @ r0<- 1 (default)
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S
new file mode 100644
index 000000000..7241af14e
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S
@@ -0,0 +1,32 @@
+%verify "executed"
+%verify "basic lt, gt, eq */
+%verify "left arg NaN"
+%verify "right arg NaN"
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    fldd    d0, [r0]                    @ d0<- vBB
+    fldd    d1, [r1]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    mvn     r0, #0                      @ r0<- -1 (default)
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r0<- 1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_CMPL_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
new file mode 100644
index 000000000..bdb42d60f
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
@@ -0,0 +1,32 @@
+%verify "executed"
+%verify "basic lt, gt, eq */
+%verify "left arg NaN"
+%verify "right arg NaN"
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    flds    s0, [r0]                    @ d0<- vBB
+    flds    s1, [r1]                    @ d1<- vCC
+    fcmps  s0, s1                      @ compare (vBB, vCC)
+    mvn     r0, #0                      @ r0<- -1 (default)
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r0<- 1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_DIV_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_DIV_DOUBLE_VFP.S
new file mode 100644
index 000000000..8fa58b86a
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_DIV_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"fdivd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_DIV_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_DIV_FLOAT_VFP.S
new file mode 100644
index 000000000..fc125ce6a
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_DIV_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fdivs   s2, s0, s1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S
new file mode 100644
index 000000000..dba3b082f
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopNarrower.S" {"instr":"fcvtsd  s0, d0"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S
new file mode 100644
index 000000000..4d910aadd
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopNarrower.S" {"instr":"ftosizd  s0, d0"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S
new file mode 100644
index 000000000..a5157dd8b
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopWider.S" {"instr":"fcvtds  d0, s0"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S
new file mode 100644
index 000000000..90900aa1b
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funop.S" {"instr":"ftosizs s1, s0"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S
new file mode 100644
index 000000000..c9f4fd688
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funopWider.S" {"instr":"fsitod  d0, s0"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S
new file mode 100644
index 000000000..a8f57b505
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/funop.S" {"instr":"fsitos  s1, s0"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_MEM_OP_DECODE.S b/vm/compiler/template/armv6-vfp/TEMPLATE_MEM_OP_DECODE.S
new file mode 100644
index 000000000..21e23a9ae
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_MEM_OP_DECODE.S
@@ -0,0 +1,19 @@
+#if defined(WITH_SELF_VERIFICATION)
+    /*
+     * This handler encapsulates heap memory ops for selfVerification mode.
+     *
+     * The call to the handler is inserted prior to a heap memory operation.
+     * This handler then calls a function to decode the memory op, and process
+     * it accordingly. Afterwards, the handler changes the return address to
+     * skip the memory op so it never gets executed.
+     */
+    vpush   {d0-d15}                    @ save out all fp registers
+    push    {r0-r12,lr}                 @ save out all registers
+    mov     r0, lr                      @ arg0 <- link register
+    mov     r1, sp                      @ arg1 <- stack pointer
+    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
+    blx     r2                          @ decode and handle the mem op
+    pop     {r0-r12,lr}                 @ restore all registers
+    vpop    {d0-d15}                    @ restore all fp registers
+    bx      lr                          @ return to compiled code
+#endif
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_MUL_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_MUL_DOUBLE_VFP.S
new file mode 100644
index 000000000..459e7960e
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_MUL_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"fmuld   d2, d0, d1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_MUL_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_MUL_FLOAT_VFP.S
new file mode 100644
index 000000000..301fa8436
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_MUL_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fmuls   s2, s0, s1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_RESTORE_STATE.S b/vm/compiler/template/armv6-vfp/TEMPLATE_RESTORE_STATE.S
new file mode 100644
index 000000000..ec8013977
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_RESTORE_STATE.S
@@ -0,0 +1,11 @@
+    /*
+     * This handler restores state following a selfVerification memory access.
+     * On entry:
+     *    r0 - offset from rGLUE to the 1st element of the coreRegs save array.
+     */
+    add     r0, r0, rGLUE               @ pointer to heapArgSpace.coreRegs[0]
+    add     r0, #64                     @ pointer to heapArgSpace.fpRegs[0]
+    vldmia  r0, {d0-d15}
+    sub     r0, #64                     @ pointer to heapArgSpace.coreRegs[0]
+    ldmia   r0, {r0-r12}
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_SAVE_STATE.S b/vm/compiler/template/armv6-vfp/TEMPLATE_SAVE_STATE.S
new file mode 100644
index 000000000..1bd02c878
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_SAVE_STATE.S
@@ -0,0 +1,23 @@
+    /*
+     * This handler performs a register save for selfVerification mode.
+     * On entry:
+     *    Top of stack + 4: r7 value to save
+     *    Top of stack + 0: r0 value to save
+     *    r0 - offset from rGLUE to the beginning of the heapArgSpace record
+     *    r7 - the value of regMap
+     *
+     * The handler must save regMap, r0-r12 and then return with r0-r12
+     * with their original values (note that this means r0 and r7 must take
+     * the values on the stack - not the ones in those registers on entry.
+     * Finally, the two registers previously pushed must be popped.
+     */
+    add     r0, r0, rGLUE               @ pointer to heapArgSpace
+    stmia   r0!, {r7}                   @ save regMap
+    ldr     r7, [r13, #0]               @ recover r0 value
+    stmia   r0!, {r7}                   @ save r0
+    ldr     r7, [r13, #4]               @ recover r7 value
+    stmia   r0!, {r1-r12}
+    add     r0, #12                     @ move to start of FP save regio
+    vstmia  r0, {d0-d15}
+    pop     {r0, r7}                    @ recover r0, r7
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S
new file mode 100644
index 000000000..1c6bb467b
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S
@@ -0,0 +1,23 @@
+%verify "executed"
+    /*
+     * 64-bit floating point vfp sqrt operation.
+     * If the result is a NaN, bail out to library code to do
+     * the right thing.
+     *
+     * On entry:
+     *     r2 src addr of op1
+     * On exit:
+     *     r0,r1 = res
+     */
+    fldd    d0, [r2]
+    fsqrtd  d1, d0
+    fcmpd   d1, d1
+    fmstat
+    fmrrd   r0, r1, d1
+    bxeq    lr   @ Result OK - return
+    ldr     r2, .Lsqrt
+    fmrrd   r0, r1, d0   @ reload orig operand
+    bx      r2   @ tail call to sqrt library routine
+
+.Lsqrt:
+    .word   sqrt
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_SUB_DOUBLE_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_SUB_DOUBLE_VFP.S
new file mode 100644
index 000000000..8fa20a028
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_SUB_DOUBLE_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinopWide.S" {"instr":"fsubd   d2, d0, d1"}
diff --git a/vm/compiler/template/armv6-vfp/TEMPLATE_SUB_FLOAT_VFP.S b/vm/compiler/template/armv6-vfp/TEMPLATE_SUB_FLOAT_VFP.S
new file mode 100644
index 000000000..5e17e514c
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TEMPLATE_SUB_FLOAT_VFP.S
@@ -0,0 +1,2 @@
+%verify "executed"
+%include "armv5te-vfp/fbinop.S" {"instr":"fsubs   s2, s0, s1"}
diff --git a/vm/compiler/template/armv6-vfp/TemplateOpList.h b/vm/compiler/template/armv6-vfp/TemplateOpList.h
new file mode 100644
index 000000000..d991bedb0
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/TemplateOpList.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik opcode list that uses additional templates to complete JIT execution.
+ */
+#ifndef JIT_TEMPLATE
+#define JIT_TEMPLATE(X)
+#endif
+
+JIT_TEMPLATE(CMP_LONG)
+JIT_TEMPLATE(RETURN)
+JIT_TEMPLATE(INVOKE_METHOD_NO_OPT)
+JIT_TEMPLATE(INVOKE_METHOD_CHAIN)
+JIT_TEMPLATE(INVOKE_METHOD_PREDICTED_CHAIN)
+JIT_TEMPLATE(INVOKE_METHOD_NATIVE)
+JIT_TEMPLATE(MUL_LONG)
+JIT_TEMPLATE(SHL_LONG)
+JIT_TEMPLATE(SHR_LONG)
+JIT_TEMPLATE(USHR_LONG)
+JIT_TEMPLATE(ADD_FLOAT_VFP)
+JIT_TEMPLATE(SUB_FLOAT_VFP)
+JIT_TEMPLATE(MUL_FLOAT_VFP)
+JIT_TEMPLATE(DIV_FLOAT_VFP)
+JIT_TEMPLATE(ADD_DOUBLE_VFP)
+JIT_TEMPLATE(SUB_DOUBLE_VFP)
+JIT_TEMPLATE(MUL_DOUBLE_VFP)
+JIT_TEMPLATE(DIV_DOUBLE_VFP)
+JIT_TEMPLATE(DOUBLE_TO_FLOAT_VFP)
+JIT_TEMPLATE(DOUBLE_TO_INT_VFP)
+JIT_TEMPLATE(FLOAT_TO_DOUBLE_VFP)
+JIT_TEMPLATE(FLOAT_TO_INT_VFP)
+JIT_TEMPLATE(INT_TO_DOUBLE_VFP)
+JIT_TEMPLATE(INT_TO_FLOAT_VFP)
+JIT_TEMPLATE(CMPG_DOUBLE_VFP)
+JIT_TEMPLATE(CMPL_DOUBLE_VFP)
+JIT_TEMPLATE(CMPG_FLOAT_VFP)
+JIT_TEMPLATE(CMPL_FLOAT_VFP)
+JIT_TEMPLATE(SQRT_DOUBLE_VFP)
+JIT_TEMPLATE(THROW_EXCEPTION_COMMON)
+JIT_TEMPLATE(MEM_OP_DECODE)
+JIT_TEMPLATE(STRING_COMPARETO)
+JIT_TEMPLATE(STRING_INDEXOF)
+JIT_TEMPLATE(INTERPRET)
+JIT_TEMPLATE(MONITOR_ENTER)
+JIT_TEMPLATE(MONITOR_ENTER_DEBUG)
diff --git a/vm/compiler/template/armv6-vfp/fbinop.S b/vm/compiler/template/armv6-vfp/fbinop.S
new file mode 100644
index 000000000..3bc4b52a9
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/fbinop.S
@@ -0,0 +1,14 @@
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     $instr
+     fsts    s2,[r0]
+     bx      lr
diff --git a/vm/compiler/template/armv6-vfp/fbinopWide.S b/vm/compiler/template/armv6-vfp/fbinopWide.S
new file mode 100644
index 000000000..3774646bf
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/fbinopWide.S
@@ -0,0 +1,14 @@
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     $instr
+     fstd    d2,[r0]
+     bx      lr
diff --git a/vm/compiler/template/armv6-vfp/funop.S b/vm/compiler/template/armv6-vfp/funop.S
new file mode 100644
index 000000000..8409c287c
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/funop.S
@@ -0,0 +1,15 @@
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s1 = op s0".
+     *
+     * For: float-to-int, int-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    $instr                              @ s1<- op s0
+    fsts    s1, [r0]                    @ vA<- s1
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/funopNarrower.S b/vm/compiler/template/armv6-vfp/funopNarrower.S
new file mode 100644
index 000000000..8566fcaf2
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/funopNarrower.S
@@ -0,0 +1,15 @@
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    fldd    d0, [r1]                    @ d0<- vB
+    $instr                              @ s0<- op d0
+    fsts    s0, [r0]                    @ vA<- s0
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/funopWider.S b/vm/compiler/template/armv6-vfp/funopWider.S
new file mode 100644
index 000000000..dbe745c9b
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/funopWider.S
@@ -0,0 +1,15 @@
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    $instr                              @ d0<- op s0
+    fstd    d0, [r0]                    @ vA<- d0
+    bx      lr
diff --git a/vm/compiler/template/armv6-vfp/platform.S b/vm/compiler/template/armv6-vfp/platform.S
new file mode 100644
index 000000000..880e8750f
--- /dev/null
+++ b/vm/compiler/template/armv6-vfp/platform.S
@@ -0,0 +1,16 @@
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines and utility
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
diff --git a/vm/compiler/template/config-armv6-vfp b/vm/compiler/template/config-armv6-vfp
new file mode 100644
index 000000000..1b02261eb
--- /dev/null
+++ b/vm/compiler/template/config-armv6-vfp
@@ -0,0 +1,62 @@
+
+# Copyright (C) 2009 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Configuration for ARMv5TE architecture targets.
+#
+
+# file header and basic definitions
+#import c/header.c
+import armv5te/header.S
+
+# C pre-processor defines for stub C instructions
+#import cstubs/stubdefs.c
+
+# highly-platform-specific defs
+import armv5te-vfp/platform.S
+
+# common defs for the C helpers; include this before the instruction handlers
+#import c/opcommon.c
+
+# opcode list; argument to op-start is default directory
+op-start armv5te-vfp
+    op TEMPLATE_CMP_LONG armv5te
+    op TEMPLATE_INVOKE_METHOD_CHAIN armv5te
+    op TEMPLATE_INVOKE_METHOD_NATIVE armv5te
+    op TEMPLATE_INVOKE_METHOD_NO_OPT armv5te
+    op TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN armv5te
+    op TEMPLATE_MUL_LONG armv5te
+    op TEMPLATE_RETURN armv5te
+    op TEMPLATE_SHL_LONG armv5te
+    op TEMPLATE_SHR_LONG armv5te
+    op TEMPLATE_USHR_LONG armv5te
+    op TEMPLATE_THROW_EXCEPTION_COMMON armv5te
+    op TEMPLATE_STRING_COMPARETO armv5te
+    op TEMPLATE_STRING_INDEXOF armv5te
+    op TEMPLATE_INTERPRET armv5te
+    op TEMPLATE_MONITOR_ENTER armv5te
+    op TEMPLATE_MONITOR_ENTER_DEBUG armv5te
+
+op-end
+
+# "helper" code for C; include if you use any of the C stubs (this generates
+# object code, so it's normally excluded)
+##import c/gotoTargets.c
+
+# end of defs; include this when cstubs/stubdefs.c is included
+#import cstubs/enddefs.c
+
+# common subroutines for asm
+import armv5te/footer.S
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv6-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv6-vfp.S
new file mode 100644
index 000000000..4f5e52b10
--- /dev/null
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv6-vfp.S
@@ -0,0 +1,1543 @@
+/*
+ * This file was generated automatically by gen-template.py for 'armv6-vfp'.
+ *
+ * --> DO NOT EDIT <--
+ */
+
+/* File: armv5te/header.S */
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(WITH_JIT)
+
+/*
+ * ARMv5 definitions and declarations.
+ */
+
+/*
+ARM EABI general notes:
+
+r0-r3 hold first 4 args to a method; they are not preserved across method calls
+r4-r8 are available for general use
+r9 is given special treatment in some situations, but not for us
+r10 (sl) seems to be generally available
+r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+r12 (ip) is scratch -- not preserved across method calls
+r13 (sp) should be managed carefully in case a signal arrives
+r14 (lr) must be preserved
+r15 (pc) can be tinkered with directly
+
+r0 holds returns of <= 4 bytes
+r0-r1 hold returns of 8 bytes, low word in r0
+
+Callee must save/restore r4+ (except r12) if it modifies them.
+
+Stack is "full descending".  Only the arguments that don't fit in the first 4
+registers are placed on the stack.  "sp" points at the first stacked argument
+(i.e. the 5th arg).
+
+VFP: single-precision results in s0, double-precision results in d0.
+
+In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+64-bit quantities (long long, double) must be 64-bit aligned.
+*/
+
+/*
+JIT and ARM notes:
+
+The following registers have fixed assignments:
+
+  reg nick      purpose
+  r5  rFP       interpreted frame pointer, used for accessing locals and args
+  r6  rGLUE     MterpGlue pointer
+
+The following registers have fixed assignments in mterp but are scratch
+registers in compiled code
+
+  reg nick      purpose
+  r4  rPC       interpreted program counter, used for fetching instructions
+  r7  rINST     first 16-bit code unit of current instruction
+  r8  rIBASE    interpreted instruction base pointer, used for computed goto
+
+Macros are provided for common operations.  Each macro MUST emit only
+one instruction to make instruction-counting easier.  They MUST NOT alter
+unspecified registers or condition codes.
+*/
+
+/* single-purpose registers, given names for clarity */
+#define rPC     r4
+#define rFP     r5
+#define rGLUE   r6
+#define rINST   r7
+#define rIBASE  r8
+
+/*
+ * Given a frame pointer, find the stack save area.
+ *
+ * In C this is "((StackSaveArea*)(_fp) -1)".
+ */
+#define SAVEAREA_FROM_FP(_reg, _fpreg) \
+    sub     _reg, _fpreg, #sizeofStackSaveArea
+
+#define EXPORT_PC() \
+    str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "../../../mterp/common/asm-constants.h"
+
+
+/* File: armv5te-vfp/platform.S */
+/*
+ * ===========================================================================
+ *  CPU-version-specific defines and utility
+ * ===========================================================================
+ */
+
+/*
+ * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
+ * Jump to subroutine.
+ *
+ * May modify IP and LR.
+ */
+.macro  LDR_PC_LR source
+    mov     lr, pc
+    ldr     pc, \source
+.endm
+
+
+    .global dvmCompilerTemplateStart
+    .type   dvmCompilerTemplateStart, %function
+    .text
+
+dvmCompilerTemplateStart:
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMP_LONG
+dvmCompiler_TEMPLATE_CMP_LONG:
+/* File: armv5te/TEMPLATE_CMP_LONG.S */
+    /*
+     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
+     * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
+     */
+    /* cmp-long vAA, vBB, vCC */
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
+    bgt     .LTEMPLATE_CMP_LONG_greater
+    subs    r0, r0, r2                  @ r0<- r0 - r2
+    bxeq     lr
+    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
+.LTEMPLATE_CMP_LONG_less:
+    mvn     r0, #0                      @ r0<- -1
+    bx      lr
+.LTEMPLATE_CMP_LONG_greater:
+    mov     r0, #1                      @ r0<- 1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_RETURN
+dvmCompiler_TEMPLATE_RETURN:
+/* File: armv5te/TEMPLATE_RETURN.S */
+    /*
+     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
+     * If the stored value in returnAddr
+     * is non-zero, the caller is compiled by the JIT thus return to the
+     * address in the code cache following the invoke instruction. Otherwise
+     * return to the special dvmJitToInterpNoChain entry point.
+     */
+    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
+    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
+#if !defined(WITH_SELF_VERIFICATION)
+    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
+#else
+    mov     r9, #0                      @ disable chaining
+#endif
+    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
+                                        @ r2<- method we're returning to
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    cmp     r2, #0                      @ break frame?
+#if !defined(WITH_SELF_VERIFICATION)
+    beq     1f                          @ bail to interpreter
+#else
+    blxeq   lr                          @ punt to interpreter and compare state
+#endif
+    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
+    mov     rFP, r10                    @ publish new FP
+    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
+    ldr     r8, [r8]                    @ r8<- suspendCount
+
+    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
+    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
+    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
+    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
+    str     r0, [rGLUE, #offGlue_methodClassDex]
+    cmp     r8, #0                      @ check the suspendCount
+    movne   r9, #0                      @ clear the chaining cell address
+    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
+    cmp     r9, #0                      @ chaining cell exists?
+    blxne   r9                          @ jump to the chaining cell
+#if defined(JIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1                      @ callsite is interpreted
+1:
+    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
+    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
+    mov     r1, #0                      @ changeInterp = false
+    mov     r0, rGLUE                   @ Expecting rGLUE in r0
+    blx     r2                          @ exit the interpreter
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
+    /*
+     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
+     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
+     * runtime-resolved callee.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r8<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    lr                          @ bail to the interpreter
+    tst     r10, #ACC_NATIVE
+#if !defined(WITH_SELF_VERIFICATION)
+    bne     .LinvokeNative
+#else
+    bxne    lr                          @ bail to the interpreter
+#endif
+
+    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    @ Start executing the callee
+#if defined(JIT_STATS)
+    mov     r0, #kInlineCacheMiss
+#endif
+    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
+    /*
+     * For monomorphic callsite, setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    @ methodToCall is guaranteed to be non-native
+.LinvokeChain:
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    add     r12, lr, #2                 @ setup the punt-to-interp address
+    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
+    ldr     r8, [r8]                    @ r8<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    r12                         @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    bxne    r12                         @ bail to the interpreter
+
+    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
+    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
+
+    @ Update "glue" values for the new method
+    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
+    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
+    mov     rFP, r1                         @ fp = newFp
+    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
+
+    bx      lr                              @ return to the callee-chaining cell
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
+dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
+    /*
+     * For polymorphic callsite, check whether the cached class pointer matches
+     * the current one. If so setup the Dalvik frame and return to the
+     * Thumb code through the link register to transfer control to the callee
+     * method through a dedicated chaining cell.
+     *
+     * The predicted chaining cell is declared in ArmLIR.h with the
+     * following layout:
+     *
+     *  typedef struct PredictedChainingCell {
+     *      u4 branch;
+     *      const ClassObject *clazz;
+     *      const Method *method;
+     *      u4 counter;
+     *  } PredictedChainingCell;
+     *
+     * Upon returning to the callsite:
+     *    - lr  : to branch to the chaining cell
+     *    - lr+2: to punt to the interpreter
+     *    - lr+4: to fully resolve the callee and may rechain.
+     *            r3 <- class
+     *            r9 <- counter
+     */
+    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
+    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
+    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
+    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
+    ldr     r9, [r2, #12]   @ r9 <- predictedChainCell->counter
+    cmp     r3, r8          @ predicted class == actual class?
+    beq     .LinvokeChain   @ predicted chain is valid
+    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
+    sub     r1, r9, #1      @ count--
+    str     r1, [r2, #12]   @ write back to PredictedChainingCell->counter
+    add     lr, lr, #4      @ return to fully-resolve landing pad
+    /*
+     * r1 <- count
+     * r2 <- &predictedChainCell
+     * r3 <- this->class
+     * r4 <- dPC
+     * r7 <- this->class->vtable
+     */
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
+dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
+/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
+    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
+    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
+    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
+    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
+    add     r3, r1, #1  @ Thumb addr is odd
+    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
+    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
+    ldr     r8, [r8]                    @ r3<- suspendCount (int)
+    cmp     r10, r9                     @ bottom < interpStackEnd?
+    bxlt    lr                          @ return to raise stack overflow excep.
+    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
+    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
+    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
+
+
+    @ set up newSaveArea
+    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
+    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
+    cmp     r8, #0                      @ suspendCount != 0
+    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
+#if !defined(WITH_SELF_VERIFICATION)
+    bxne    lr                          @ bail to the interpreter
+#else
+    bx      lr                          @ bail to interpreter unconditionally
+#endif
+
+    @ go ahead and transfer control to the native code
+    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
+    mov     r2, #0
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
+    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
+                                        @ newFp->localRefCookie=top
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    blx     r8                          @ off to the native code
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
+    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
+
+    @ r0 = dalvikCallsitePC
+    bne     .LhandleException           @ no, handle exception
+
+    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
+    cmp     r2, #0                      @ return chaining cell still exists?
+    bxne    r2                          @ yes - go ahead
+
+    @ continue executing the next instruction through the interpreter
+    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
+    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
+#if defined(JIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1
+
+
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_LONG
+dvmCompiler_TEMPLATE_MUL_LONG:
+/* File: armv5te/TEMPLATE_MUL_LONG.S */
+    /*
+     * Signed 64-bit integer multiply.
+     *
+     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
+     *
+     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+     *        WX
+     *      x YZ
+     *  --------
+     *     ZW ZX
+     *  YW YX
+     *
+     * The low word of the result holds ZX, the high word holds
+     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
+     * it doesn't fit in the low 64 bits.
+     *
+     * Unlike most ARM math operations, multiply instructions have
+     * restrictions on using the same register more than once (Rd and Rm
+     * cannot be the same).
+     */
+    /* mul-long vAA, vBB, vCC */
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
+    mov     r0,r9
+    mov     r1,r10
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHL_LONG
+dvmCompiler_TEMPLATE_SHL_LONG:
+/* File: armv5te/TEMPLATE_SHL_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SHR_LONG
+dvmCompiler_TEMPLATE_SHR_LONG:
+/* File: armv5te/TEMPLATE_SHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_USHR_LONG
+dvmCompiler_TEMPLATE_USHR_LONG:
+/* File: armv5te/TEMPLATE_USHR_LONG.S */
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
+dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fadds   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
+dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fsubs   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
+dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fmuls   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
+dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
+/* File: armv5te-vfp/fbinop.S */
+    /*
+     * Generic 32-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     flds    s0,[r1]
+     flds    s1,[r2]
+     fdivs   s2, s0, s1
+     fsts    s2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
+dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     faddd   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
+dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     fsubd   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
+dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     fmuld   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
+dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
+/* File: armv5te-vfp/fbinopWide.S */
+    /*
+     * Generic 64-bit floating point operation.  Provide an "instr" line that
+     * specifies an instruction that performs s2 = s0 op s1.
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = op1 address
+     *     r2 = op2 address
+     */
+     fldd    d0,[r1]
+     fldd    d1,[r2]
+     fdivd   d2, d0, d1
+     fstd    d2,[r0]
+     bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
+dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
+/* File: armv5te-vfp/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    fldd    d0, [r1]                    @ d0<- vB
+    fcvtsd  s0, d0                              @ s0<- op d0
+    fsts    s0, [r0]                    @ vA<- s0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
+dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
+/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
+/* File: armv5te-vfp/funopNarrower.S */
+    /*
+     * Generic 64bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s0 = op d0".
+     *
+     * For: double-to-int, double-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    fldd    d0, [r1]                    @ d0<- vB
+    ftosizd  s0, d0                              @ s0<- op d0
+    fsts    s0, [r0]                    @ vA<- s0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
+dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
+/* File: armv5te-vfp/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    fcvtds  d0, s0                              @ d0<- op s0
+    fstd    d0, [r0]                    @ vA<- d0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
+dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
+/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
+/* File: armv5te-vfp/funop.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s1 = op s0".
+     *
+     * For: float-to-int, int-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    ftosizs s1, s0                              @ s1<- op s0
+    fsts    s1, [r0]                    @ vA<- s1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
+dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
+/* File: armv5te-vfp/funopWider.S */
+    /*
+     * Generic 32bit-to-64bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "d0 = op s0".
+     *
+     * For: int-to-double, float-to-double
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    fsitod  d0, s0                              @ d0<- op s0
+    fstd    d0, [r0]                    @ vA<- d0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
+dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
+/* File: armv5te-vfp/funop.S */
+    /*
+     * Generic 32bit-to-32bit floating point unary operation.  Provide an
+     * "instr" line that specifies an instruction that performs "s1 = op s0".
+     *
+     * For: float-to-int, int-to-float
+     *
+     * On entry:
+     *     r0 = target dalvik register address
+     *     r1 = src dalvik register address
+     */
+    /* unop vA, vB */
+    flds    s0, [r1]                    @ s0<- vB
+    fsitos  s1, s0                              @ s1<- op s0
+    fsts    s1, [r0]                    @ vA<- s1
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
+dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     *
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    fldd    d0, [r0]                    @ d0<- vBB
+    fldd    d1, [r1]                    @ d1<- vCC
+    fcmpd  d0, d1                       @ compare (vBB, vCC)
+    mov     r0, #1                      @ r0<- 1 (default)
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
+dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    fldd    d0, [r0]                    @ d0<- vBB
+    fldd    d1, [r1]                    @ d1<- vCC
+    fcmped  d0, d1                      @ compare (vBB, vCC)
+    mvn     r0, #0                      @ r0<- -1 (default)
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r0<- 1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
+dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else {
+     *         return 1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    flds    s0, [r0]                    @ d0<- vBB
+    flds    s1, [r1]                    @ d1<- vCC
+    fcmps  s0, s1                      @ compare (vBB, vCC)
+    mov     r0, #1                      @ r0<- 1 (default)
+    fmstat                              @ export status flags
+    mvnmi   r0, #0                      @ (less than) r0<- -1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
+dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
+/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
+    /*
+     * Compare two floating-point values.  Puts 0, 1, or -1 into the
+     * destination register based on the results of the comparison.
+     *
+     * int compare(x, y) {
+     *     if (x == y) {
+     *         return 0;
+     *     } else if (x > y) {
+     *         return 1;
+     *     } else if (x < y) {
+     *         return -1;
+     *     } else {
+     *         return -1;
+     *     }
+     * }
+     * On entry:
+     *    r0 = &op1 [vBB]
+     *    r1 = &op2 [vCC]
+     */
+    /* op vAA, vBB, vCC */
+    flds    s0, [r0]                    @ d0<- vBB
+    flds    s1, [r1]                    @ d1<- vCC
+    fcmps  s0, s1                      @ compare (vBB, vCC)
+    mvn     r0, #0                      @ r0<- -1 (default)
+    fmstat                              @ export status flags
+    movgt   r0, #1                      @ (greater than) r0<- 1
+    moveq   r0, #0                      @ (equal) r0<- 0
+    bx      lr
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
+dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
+/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
+    /*
+     * 64-bit floating point vfp sqrt operation.
+     * If the result is a NaN, bail out to library code to do
+     * the right thing.
+     *
+     * On entry:
+     *     r2 src addr of op1
+     * On exit:
+     *     r0,r1 = res
+     */
+    fldd    d0, [r2]
+    fsqrtd  d1, d0
+    fcmpd   d1, d1
+    fmstat
+    fmrrd   r0, r1, d1
+    bxeq    lr   @ Result OK - return
+    ldr     r2, .Lsqrt
+    fmrrd   r0, r1, d0   @ reload orig operand
+    bx      r2   @ tail call to sqrt library routine
+
+.Lsqrt:
+    .word   sqrt
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
+dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
+/* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
+    /*
+     * Throw an exception from JIT'ed code.
+     * On entry:
+     *    r0    Dalvik PC that raises the exception
+     */
+    b       .LhandleException
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
+dvmCompiler_TEMPLATE_MEM_OP_DECODE:
+/* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
+#if defined(WITH_SELF_VERIFICATION)
+    /*
+     * This handler encapsulates heap memory ops for selfVerification mode.
+     *
+     * The call to the handler is inserted prior to a heap memory operation.
+     * This handler then calls a function to decode the memory op, and process
+     * it accordingly. Afterwards, the handler changes the return address to
+     * skip the memory op so it never gets executed.
+     */
+    vpush   {d0-d15}                    @ save out all fp registers
+    push    {r0-r12,lr}                 @ save out all registers
+    mov     r0, lr                      @ arg0 <- link register
+    mov     r1, sp                      @ arg1 <- stack pointer
+    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
+    blx     r2                          @ decode and handle the mem op
+    pop     {r0-r12,lr}                 @ restore all registers
+    vpop    {d0-d15}                    @ restore all fp registers
+    bx      lr                          @ return to compiled code
+#endif
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
+dvmCompiler_TEMPLATE_STRING_COMPARETO:
+/* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
+    /*
+     * String's compareTo.
+     *
+     * Requires r0/r1 to have been previously checked for null.  Will
+     * return negative if this's string is < comp, 0 if they are the
+     * same and positive if >.
+     *
+     * IMPORTANT NOTE:
+     *
+     * This code relies on hard-coded offsets for string objects, and must be
+     * kept in sync with definitions in UtfString.h.  See asm-constants.h
+     *
+     * On entry:
+     *    r0:   this object pointer
+     *    r1:   comp object pointer
+     *
+     */
+
+    mov    r2, r0         @ this to r2, opening up r0 for return value
+    subs   r0, r2, r1     @ Same?
+    bxeq   lr
+
+    ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
+    ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
+    ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
+    ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
+    ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
+    ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
+
+    /*
+     * At this point, we have:
+     *    value:  r2/r1
+     *    offset: r4/r9
+     *    count:  r7/r10
+     * We're going to compute
+     *    r11 <- countDiff
+     *    r10 <- minCount
+     */
+     subs  r11, r7, r10
+     movls r10, r7
+
+     /* Now, build pointers to the string data */
+     add   r2, r2, r4, lsl #1
+     add   r1, r1, r9, lsl #1
+     /*
+      * Note: data pointers point to previous element so we can use pre-index
+      * mode with base writeback.
+      */
+     add   r2, #16-2   @ offset to contents[-1]
+     add   r1, #16-2   @ offset to contents[-1]
+
+     /*
+      * At this point we have:
+      *   r2: *this string data
+      *   r1: *comp string data
+      *   r10: iteration count for comparison
+      *   r11: value to return if the first part of the string is equal
+      *   r0: reserved for result
+      *   r3, r4, r7, r8, r9, r12 available for loading string data
+      */
+
+    subs  r10, #2
+    blt   do_remainder2
+
+      /*
+       * Unroll the first two checks so we can quickly catch early mismatch
+       * on long strings (but preserve incoming alignment)
+       */
+
+    ldrh  r3, [r2, #2]!
+    ldrh  r4, [r1, #2]!
+    ldrh  r7, [r2, #2]!
+    ldrh  r8, [r1, #2]!
+    subs  r0, r3, r4
+    subeqs  r0, r7, r8
+    bxne  lr
+    cmp   r10, #28
+    bgt   do_memcmp16
+    subs  r10, #3
+    blt   do_remainder
+
+loopback_triple:
+    ldrh  r3, [r2, #2]!
+    ldrh  r4, [r1, #2]!
+    ldrh  r7, [r2, #2]!
+    ldrh  r8, [r1, #2]!
+    ldrh  r9, [r2, #2]!
+    ldrh  r12,[r1, #2]!
+    subs  r0, r3, r4
+    subeqs  r0, r7, r8
+    subeqs  r0, r9, r12
+    bxne  lr
+    subs  r10, #3
+    bge   loopback_triple
+
+do_remainder:
+    adds  r10, #3
+    beq   returnDiff
+
+loopback_single:
+    ldrh  r3, [r2, #2]!
+    ldrh  r4, [r1, #2]!
+    subs  r0, r3, r4
+    bxne  lr
+    subs  r10, #1
+    bne     loopback_single
+
+returnDiff:
+    mov   r0, r11
+    bx    lr
+
+do_remainder2:
+    adds  r10, #2
+    bne   loopback_single
+    mov   r0, r11
+    bx    lr
+
+    /* Long string case */
+do_memcmp16:
+    mov   r4, lr
+    ldr   lr, .Lmemcmp16
+    mov   r7, r11
+    add   r0, r2, #2
+    add   r1, r1, #2
+    mov   r2, r10
+    blx   lr
+    cmp   r0, #0
+    bxne  r4
+    mov   r0, r7
+    bx    r4
+
+.Lmemcmp16:
+    .word __memcmp16
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
+dvmCompiler_TEMPLATE_STRING_INDEXOF:
+/* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
+    /*
+     * String's indexOf.
+     *
+     * Requires r0 to have been previously checked for null.  Will
+     * return index of match of r1 in r0.
+     *
+     * IMPORTANT NOTE:
+     *
+     * This code relies on hard-coded offsets for string objects, and must be
+     * kept in sync wth definitions in UtfString.h  See asm-constants.h
+     *
+     * On entry:
+     *    r0:   string object pointer
+     *    r1:   char to match
+     *    r2:   Starting offset in string data
+     */
+
+    ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
+    ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
+    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
+
+    /*
+     * At this point, we have:
+     *    r0: object pointer
+     *    r1: char to match
+     *    r2: starting offset
+     *    r7: offset
+     *    r8: string length
+     */
+
+     /* Build pointer to start of string data */
+     add   r0, #16
+     add   r0, r0, r7, lsl #1
+
+     /* Save a copy of starting data in r7 */
+     mov   r7, r0
+
+     /* Clamp start to [0..count] */
+     cmp   r2, #0
+     movlt r2, #0
+     cmp   r2, r8
+     movgt r2, r8
+
+     /* Build pointer to start of data to compare and pre-bias */
+     add   r0, r0, r2, lsl #1
+     sub   r0, #2
+
+     /* Compute iteration count */
+     sub   r8, r2
+
+     /*
+      * At this point we have:
+      *   r0: start of data to test
+      *   r1: chat to compare
+      *   r8: iteration count
+      *   r7: original start of string
+      *   r3, r4, r9, r10, r11, r12 available for loading string data
+      */
+
+    subs  r8, #4
+    blt   indexof_remainder
+
+indexof_loop4:
+    ldrh  r3, [r0, #2]!
+    ldrh  r4, [r0, #2]!
+    ldrh  r10, [r0, #2]!
+    ldrh  r11, [r0, #2]!
+    cmp   r3, r1
+    beq   match_0
+    cmp   r4, r1
+    beq   match_1
+    cmp   r10, r1
+    beq   match_2
+    cmp   r11, r1
+    beq   match_3
+    subs  r8, #4
+    bge   indexof_loop4
+
+indexof_remainder:
+    adds    r8, #4
+    beq     indexof_nomatch
+
+indexof_loop1:
+    ldrh  r3, [r0, #2]!
+    cmp   r3, r1
+    beq   match_3
+    subs  r8, #1
+    bne   indexof_loop1
+
+indexof_nomatch:
+    mov   r0, #-1
+    bx    lr
+
+match_0:
+    sub   r0, #6
+    sub   r0, r7
+    asr   r0, r0, #1
+    bx    lr
+match_1:
+    sub   r0, #4
+    sub   r0, r7
+    asr   r0, r0, #1
+    bx    lr
+match_2:
+    sub   r0, #2
+    sub   r0, r7
+    asr   r0, r0, #1
+    bx    lr
+match_3:
+    sub   r0, r7
+    asr   r0, r0, #1
+    bx    lr
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_INTERPRET
+dvmCompiler_TEMPLATE_INTERPRET:
+/* File: armv5te/TEMPLATE_INTERPRET.S */
+    /*
+     * This handler transfers control to the interpeter without performing
+     * any lookups.  It may be called either as part of a normal chaining
+     * operation, or from the transition code in header.S.  We distinquish
+     * the two cases by looking at the link register.  If called from a
+     * translation chain, it will point to the chaining Dalvik PC + 1.
+     * On entry:
+     *    lr - if NULL:
+     *        r1 - the Dalvik PC to begin interpretation.
+     *    else
+     *        [lr, #-1] contains Dalvik PC to begin interpretation
+     *    rGLUE - pointer to interpState
+     *    rFP - Dalvik frame pointer
+     */
+    cmp     lr, #0
+    ldrne   r1,[lr, #-1]
+    ldr     r2, .LinterpPunt
+    mov     r0, r1                       @ set Dalvik PC
+    bx      r2
+    @ doesn't return
+
+.LinterpPunt:
+    .word   dvmJitToInterpPunt
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
+dvmCompiler_TEMPLATE_MONITOR_ENTER:
+/* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
+    /*
+     * Call out to the runtime to lock an object.  Because this thread
+     * may have been suspended in THREAD_MONITOR state and the Jit's
+     * translation cache subsequently cleared, we cannot return directly.
+     * Instead, unconditionally transition to the interpreter to resume.
+     *
+     * On entry:
+     *    r0 - self pointer
+     *    r1 - the object (which has already been null-checked by the caller
+     *    r4 - the Dalvik PC of the following instruction.
+     */
+    ldr     r2, .LdvmLockObject
+    mov     r3, #0                       @ Record that we're not returning
+    str     r3, [r0, #offThread_inJitCodeCache]
+    blx     r2                           @ dvmLockObject(self, obj)
+    @ refresh Jit's on/off status
+    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
+    ldr     r0, [r0]
+    ldr     r2, .LdvmJitToInterpNoChain
+    str     r0, [rGLUE, #offGlue_pJitProfTable]
+    @ Bail to interpreter - no chain [note - r4 still contains rPC]
+#if defined(JIT_STATS)
+    mov     r0, #kHeavyweightMonitor
+#endif
+    bx      r2
+
+
+/* ------------------------------ */
+    .balign 4
+    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
+dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
+/* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
+    /*
+     * To support deadlock prediction, this version of MONITOR_ENTER
+     * will always call the heavyweight dvmLockObject, check for an
+     * exception and then bail out to the interpreter.
+     *
+     * On entry:
+     *    r0 - self pointer
+     *    r1 - the object (which has already been null-checked by the caller
+     *    r4 - the Dalvik PC of the following instruction.
+     *
+     */
+    ldr     r2, .LdvmLockObject
+    mov     r3, #0                       @ Record that we're not returning
+    str     r3, [r0, #offThread_inJitCodeCache]
+    blx     r2             @ dvmLockObject(self, obj)
+    @ refresh Jit's on/off status & test for exception
+    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
+    ldr     r1, [rGLUE, #offGlue_self]
+    ldr     r0, [r0]
+    ldr     r1, [r1, #offThread_exception]
+    str     r0, [rGLUE, #offGlue_pJitProfTable]
+    cmp     r1, #0
+    beq     1f
+    ldr     r2, .LhandleException
+    sub     r0, r4, #2     @ roll dPC back to this monitor instruction
+    bx      r2
+1:
+    @ Bail to interpreter - no chain [note - r4 still contains rPC]
+#if defined(JIT_STATS)
+    mov     r0, #kHeavyweightMonitor
+#endif
+    ldr     pc, .LdvmJitToInterpNoChain
+
+    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
+/* File: armv5te/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+    .text
+    .align  2
+.LinvokeNative:
+    @ Prep for the native call
+    @ r1 = newFP, r0 = methodToCall
+    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
+    mov     r2, #0
+    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
+    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
+    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
+    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
+                                        @ newFp->localRefCookie=top
+    mov     r9, r3                      @ r9<- glue->self (preserve)
+    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
+
+    mov     r2, r0                      @ r2<- methodToCall
+    mov     r0, r1                      @ r0<- newFP
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
+
+    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
+
+    @ Refresh Jit's on/off status
+    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
+
+    @ native return; r9=self, r10=newSaveArea
+    @ equivalent to dvmPopJniLocals
+    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
+    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
+    ldr     r1, [r9, #offThread_exception] @ check for exception
+    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
+    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
+    cmp     r1, #0                      @ null?
+    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
+    ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
+    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
+
+    @ r0 = dalvikCallsitePC
+    bne     .LhandleException           @ no, handle exception
+
+    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
+    cmp     r2, #0                      @ return chaining cell still exists?
+    bxne    r2                          @ yes - go ahead
+
+    @ continue executing the next instruction through the interpreter
+    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
+    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
+#if defined(JIT_STATS)
+    mov     r0, #kCallsiteInterpreted
+#endif
+    mov     pc, r1
+
+/*
+ * On entry:
+ * r0  Faulting Dalvik PC
+ */
+.LhandleException:
+#if defined(WITH_SELF_VERIFICATION)
+    ldr     pc, .LdeadFood @ should not see this under self-verification mode
+.LdeadFood:
+    .word   0xdeadf00d
+#endif
+    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
+    mov     r2, #0
+    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
+    ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
+    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
+    mov     rPC, r0                 @ reload the faulting Dalvik address
+    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
+
+    .align  2
+.LdvmAsmInstructionStart:
+    .word   dvmAsmInstructionStart
+.LdvmJitToInterpTraceSelectNoChain:
+    .word   dvmJitToInterpTraceSelectNoChain
+.LdvmJitToInterpNoChain:
+    .word   dvmJitToInterpNoChain
+.LdvmMterpStdBail:
+    .word   dvmMterpStdBail
+.LdvmMterpCommonExceptionThrown:
+    .word   dvmMterpCommonExceptionThrown
+.LdvmLockObject:
+    .word   dvmLockObject
+#if defined(WITH_SELF_VERIFICATION)
+.LdvmSelfVerificationMemOpDecode:
+    .word   dvmSelfVerificationMemOpDecode
+#endif
+.L__aeabi_cdcmple:
+    .word   __aeabi_cdcmple
+.L__aeabi_cfcmple:
+    .word   __aeabi_cfcmple
+
+    .global dmvCompilerTemplateEnd
+dmvCompilerTemplateEnd:
+
+#endif /* WITH_JIT */
+
diff --git a/vm/compiler/template/rebuild.sh b/vm/compiler/template/rebuild.sh
index 5533151b9..99da922f8 100755
--- a/vm/compiler/template/rebuild.sh
+++ b/vm/compiler/template/rebuild.sh
@@ -19,5 +19,5 @@
 # generated as part of the build.
 #
 set -e
-for arch in armv5te armv5te-vfp armv7-a armv7-a-neon armv6j; do TARGET_ARCH_EXT=$arch make -f Makefile-template; done
+for arch in armv5te armv5te-vfp armv7-a armv7-a-neon armv6j armv6-vfp; do TARGET_ARCH_EXT=$arch make -f Makefile-template; done
 
diff --git a/vm/mterp/out/InterpAsm-armv6-vfp.S b/vm/mterp/out/InterpAsm-armv6-vfp.S
index 1bbef7b39..ebc06061e 100644
--- a/vm/mterp/out/InterpAsm-armv6-vfp.S
+++ b/vm/mterp/out/InterpAsm-armv6-vfp.S
@@ -185,8 +185,8 @@ unspecified registers or condition codes.
 #define SET_VREG(_reg, _vreg)   str     _reg, [rFP, _vreg, lsl #2]
 
 #if defined(WITH_JIT)
-#define GET_JIT_ENABLED(_reg)       ldr     _reg,[rGLUE,#offGlue_jitEnabled]
 #define GET_JIT_PROF_TABLE(_reg)    ldr     _reg,[rGLUE,#offGlue_pJitProfTable]
+#define GET_JIT_THRESHOLD(_reg)     ldr     _reg,[rGLUE,#offGlue_jitThreshold]
 #endif
 
 /*
@@ -201,6 +201,9 @@ unspecified registers or condition codes.
  */
 #include "../common/asm-constants.h"
 
+#if defined(WITH_JIT)
+#include "../common/jit-config.h"
+#endif
 
 /* File: armv5te/platform.S */
 /*
@@ -303,17 +306,20 @@ dvmMterpStdRun:
 
     /* set up "named" registers, figure out entry point */
     mov     rGLUE, r0                   @ set rGLUE
-    ldrb    r1, [r0, #offGlue_entryPoint]   @ InterpEntry enum is char
+    ldr     r1, [r0, #offGlue_entryPoint]   @ enum is 4 bytes in aapcs-EABI
     LOAD_PC_FP_FROM_GLUE()              @ load rPC and rFP from "glue"
     adr     rIBASE, dvmAsmInstructionStart  @ set rIBASE
     cmp     r1, #kInterpEntryInstr      @ usual case?
     bne     .Lnot_instr                 @ no, handle it
 
 #if defined(WITH_JIT)
-.Lno_singleStep:
+.LentryInstr:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
     /* Entry is always a possible trace start */
     GET_JIT_PROF_TABLE(r0)
     FETCH_INST()
+    mov    r1, #0                       @ prepare the value for the new state
+    str    r1, [r10, #offThread_inJitCodeCache] @ back to the interp land
     cmp    r0,#0
     bne    common_updateProfile
     GET_INST_OPCODE(ip)
@@ -335,18 +341,21 @@ dvmMterpStdRun:
 
 #if defined(WITH_JIT)
 .Lnot_throw:
-    ldr     r0,[rGLUE, #offGlue_jitResume]
-    ldr     r2,[rGLUE, #offGlue_jitResumePC]
+    ldr     r10,[rGLUE, #offGlue_jitResumeNPC]
+    ldr     r2,[rGLUE, #offGlue_jitResumeDPC]
     cmp     r1, #kInterpEntryResume     @ resuming after Jit single-step?
     bne     .Lbad_arg
     cmp     rPC,r2
-    bne     .Lno_singleStep             @ must have branched, don't resume
+    bne     .LentryInstr                @ must have branched, don't resume
+#if defined(WITH_SELF_VERIFICATION)
+    @ glue->entryPoint will be set in dvmSelfVerificationSaveState
+    b       jitSVShadowRunStart         @ re-enter the translation after the
+                                        @ single-stepped instruction
+    @noreturn
+#endif
     mov     r1, #kInterpEntryInstr
-    strb    r1, [rGLUE, #offGlue_entryPoint]
-    ldr     rINST, .LdvmCompilerTemplate
-    bx      r0                          @ re-enter the translation
-.LdvmCompilerTemplate:
-    .word   dvmCompilerTemplateStart
+    str     r1, [rGLUE, #offGlue_entryPoint]
+    bx      r10                         @ re-enter the translation
 #endif
 
 .Lbad_arg:
@@ -902,14 +911,17 @@ dalvik_inst:
     EXPORT_PC()                         @ before fetch: export the PC
     GET_VREG(r1, r2)                    @ r1<- vAA (object)
     cmp     r1, #0                      @ null object?
-    beq     common_errNullObject        @ yes
+    beq     1f                          @ yes
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
     bl      dvmUnlockObject             @ r0<- success for unlock(self, obj)
     cmp     r0, #0                      @ failed?
-    beq     common_exceptionThrown      @ yes, exception is pending
     FETCH_ADVANCE_INST(1)               @ before throw: advance rPC, load rINST
+    beq     common_exceptionThrown      @ yes, exception is pending
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
+1:
+    FETCH_ADVANCE_INST(1)               @ advance before throw
+    b      common_errNullObject
 
 
 /* ------------------------------ */
@@ -7277,8 +7289,8 @@ dalvik_inst:
 
 /* ------------------------------ */
     .balign 64
-.L_OP_UNUSED_EC: /* 0xec */
-/* File: armv5te/OP_UNUSED_EC.S */
+.L_OP_BREAKPOINT: /* 0xec */
+/* File: armv5te/OP_BREAKPOINT.S */
 /* File: armv5te/unused.S */
     bl      common_abort
 
@@ -7309,17 +7321,18 @@ dalvik_inst:
     /*
      * Execute a "native inline" instruction.
      *
-     * We need to call:
-     *  dvmPerformInlineOp4Std(arg0, arg1, arg2, arg3, &retval, ref)
+     * We need to call an InlineOp4Func:
+     *  bool (func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3, JValue* pResult)
      *
-     * The first four args are in r0-r3, but the last two must be pushed
-     * onto the stack.
+     * The first four args are in r0-r3, pointer to return value storage
+     * is on the stack.  The function's return value is a flag that tells
+     * us if an exception was thrown.
      */
     /* [opt] execute-inline vAA, {vC, vD, vE, vF}, inline@BBBB */
     FETCH(r10, 1)                       @ r10<- BBBB
     add     r1, rGLUE, #offGlue_retval  @ r1<- &glue->retval
     EXPORT_PC()                         @ can throw
-    sub     sp, sp, #8                  @ make room for arg(s)
+    sub     sp, sp, #8                  @ make room for arg, +64 bit align
     mov     r0, rINST, lsr #12          @ r0<- B
     str     r1, [sp]                    @ push &glue->retval
     bl      .LOP_EXECUTE_INLINE_continue        @ make call; will return after
@@ -7332,12 +7345,33 @@ dalvik_inst:
 
 /* ------------------------------ */
     .balign 64
-.L_OP_UNUSED_EF: /* 0xef */
-/* File: armv5te/OP_UNUSED_EF.S */
-/* File: armv5te/unused.S */
-    bl      common_abort
-
-
+.L_OP_EXECUTE_INLINE_RANGE: /* 0xef */
+/* File: armv5te/OP_EXECUTE_INLINE_RANGE.S */
+    /*
+     * Execute a "native inline" instruction, using "/range" semantics.
+     * Same idea as execute-inline, but we get the args differently.
+     *
+     * We need to call an InlineOp4Func:
+     *  bool (func)(u4 arg0, u4 arg1, u4 arg2, u4 arg3, JValue* pResult)
+     *
+     * The first four args are in r0-r3, pointer to return value storage
+     * is on the stack.  The function's return value is a flag that tells
+     * us if an exception was thrown.
+     */
+    /* [opt] execute-inline/range {vCCCC..v(CCCC+AA-1)}, inline@BBBB */
+    FETCH(r10, 1)                       @ r10<- BBBB
+    add     r1, rGLUE, #offGlue_retval  @ r1<- &glue->retval
+    EXPORT_PC()                         @ can throw
+    sub     sp, sp, #8                  @ make room for arg, +64 bit align
+    mov     r0, rINST, lsr #8           @ r0<- AA
+    str     r1, [sp]                    @ push &glue->retval
+    bl      .LOP_EXECUTE_INLINE_RANGE_continue        @ make call; will return after
+    add     sp, sp, #8                  @ pop stack
+    cmp     r0, #0                      @ test boolean result of inline
+    beq     common_exceptionThrown      @ returned false, handle exception
+    FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
+    GET_INST_OPCODE(ip)                 @ extract opcode from rINST
+    GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
     .balign 64
@@ -8983,6 +9017,36 @@ d2l_doconv:
     .word   gDvmInlineOpsTable
 
 
+/* continuation for OP_EXECUTE_INLINE_RANGE */
+
+    /*
+     * Extract args, call function.
+     *  r0 = #of args (0-4)
+     *  r10 = call index
+     *  lr = return addr, above  [DO NOT bl out of here w/o preserving LR]
+     */
+.LOP_EXECUTE_INLINE_RANGE_continue:
+    rsb     r0, r0, #4                  @ r0<- 4-r0
+    FETCH(r9, 2)                        @ r9<- CCCC
+    add     pc, pc, r0, lsl #3          @ computed goto, 2 instrs each
+    bl      common_abort                @ (skipped due to ARM prefetch)
+4:  add     ip, r9, #3                  @ base+3
+    GET_VREG(r3, ip)                    @ r3<- vBase[3]
+3:  add     ip, r9, #2                  @ base+2
+    GET_VREG(r2, ip)                    @ r2<- vBase[2]
+2:  add     ip, r9, #1                  @ base+1
+    GET_VREG(r1, ip)                    @ r1<- vBase[1]
+1:  add     ip, r9, #0                  @ (nop)
+    GET_VREG(r0, ip)                    @ r0<- vBase[0]
+0:
+    ldr     r9, .LOP_EXECUTE_INLINE_RANGE_table       @ table of InlineOperation
+    LDR_PC  "[r9, r10, lsl #4]"         @ sizeof=16, "func" is first entry
+    @ (not reached)
+
+.LOP_EXECUTE_INLINE_RANGE_table:
+    .word   gDvmInlineOpsTable
+
+
     .size   dvmAsmSisterStart, .-dvmAsmSisterStart
     .global dvmAsmSisterEnd
 dvmAsmSisterEnd:
@@ -9001,6 +9065,67 @@ dvmAsmSisterEnd:
     .align  2
 
 #if defined(WITH_JIT)
+#if defined(WITH_SELF_VERIFICATION)
+    .global dvmJitToInterpPunt
+dvmJitToInterpPunt:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    mov    r2,#kSVSPunt                 @ r2<- interpreter entry point
+    mov    r3, #0
+    str    r3, [r10, #offThread_inJitCodeCache] @ Back to the interp land
+    b      jitSVShadowRunEnd            @ doesn't return
+
+    .global dvmJitToInterpSingleStep
+dvmJitToInterpSingleStep:
+    str    lr,[rGLUE,#offGlue_jitResumeNPC]
+    str    r1,[rGLUE,#offGlue_jitResumeDPC]
+    mov    r2,#kSVSSingleStep           @ r2<- interpreter entry point
+    b      jitSVShadowRunEnd            @ doesn't return
+
+    .global dvmJitToInterpTraceSelectNoChain
+dvmJitToInterpTraceSelectNoChain:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    mov    r0,rPC                       @ pass our target PC
+    mov    r2,#kSVSTraceSelectNoChain   @ r2<- interpreter entry point
+    mov    r3, #0
+    str    r3, [r10, #offThread_inJitCodeCache] @ Back to the interp land
+    b      jitSVShadowRunEnd            @ doesn't return
+
+    .global dvmJitToInterpTraceSelect
+dvmJitToInterpTraceSelect:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    ldr    r0,[lr, #-1]                 @ pass our target PC
+    mov    r2,#kSVSTraceSelect          @ r2<- interpreter entry point
+    mov    r3, #0
+    str    r3, [r10, #offThread_inJitCodeCache] @ Back to the interp land
+    b      jitSVShadowRunEnd            @ doesn't return
+
+    .global dvmJitToInterpBackwardBranch
+dvmJitToInterpBackwardBranch:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    ldr    r0,[lr, #-1]                 @ pass our target PC
+    mov    r2,#kSVSBackwardBranch       @ r2<- interpreter entry point
+    mov    r3, #0
+    str    r3, [r10, #offThread_inJitCodeCache] @ Back to the interp land
+    b      jitSVShadowRunEnd            @ doesn't return
+
+    .global dvmJitToInterpNormal
+dvmJitToInterpNormal:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    ldr    r0,[lr, #-1]                 @ pass our target PC
+    mov    r2,#kSVSNormal               @ r2<- interpreter entry point
+    mov    r3, #0
+    str    r3, [r10, #offThread_inJitCodeCache] @ Back to the interp land
+    b      jitSVShadowRunEnd            @ doesn't return
+
+    .global dvmJitToInterpNoChain
+dvmJitToInterpNoChain:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    mov    r0,rPC                       @ pass our target PC
+    mov    r2,#kSVSNoChain              @ r2<- interpreter entry point
+    mov    r3, #0
+    str    r3, [r10, #offThread_inJitCodeCache] @ Back to the interp land
+    b      jitSVShadowRunEnd            @ doesn't return
+#else
 /*
  * Return from the translation cache to the interpreter when the compiler is
  * having issues translating/executing a Dalvik instruction. We have to skip
@@ -9010,12 +9135,15 @@ dvmAsmSisterEnd:
  */
     .global dvmJitToInterpPunt
 dvmJitToInterpPunt:
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
     mov    rPC, r0
-#ifdef EXIT_STATS
+#ifdef JIT_STATS
     mov    r0,lr
     bl     dvmBumpPunt;
 #endif
     EXPORT_PC()
+    mov    r0, #0
+    str    r0, [r10, #offThread_inJitCodeCache] @ Back to the interp land
     adrl   rIBASE, dvmAsmInstructionStart
     FETCH_INST()
     GET_INST_OPCODE(ip)
@@ -9030,35 +9158,58 @@ dvmJitToInterpPunt:
  */
     .global dvmJitToInterpSingleStep
 dvmJitToInterpSingleStep:
-    str    lr,[rGLUE,#offGlue_jitResume]
-    str    r1,[rGLUE,#offGlue_jitResumePC]
+    str    lr,[rGLUE,#offGlue_jitResumeNPC]
+    str    r1,[rGLUE,#offGlue_jitResumeDPC]
     mov    r1,#kInterpEntryInstr
     @ enum is 4 byte in aapcs-EABI
     str    r1, [rGLUE, #offGlue_entryPoint]
     mov    rPC,r0
     EXPORT_PC()
+
     adrl   rIBASE, dvmAsmInstructionStart
     mov    r2,#kJitSingleStep     @ Ask for single step and then revert
     str    r2,[rGLUE,#offGlue_jitState]
     mov    r1,#1                  @ set changeInterp to bail to debug interp
     b      common_gotoBail
 
+/*
+ * Return from the translation cache and immediately request
+ * a translation for the exit target.  Commonly used for callees.
+ */
+    .global dvmJitToInterpTraceSelectNoChain
+dvmJitToInterpTraceSelectNoChain:
+#ifdef JIT_STATS
+    bl     dvmBumpNoChain
+#endif
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    mov    r0,rPC
+    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    str    r0, [r10, #offThread_inJitCodeCache] @ set the inJitCodeCache flag
+    mov    r1, rPC                  @ arg1 of translation may need this
+    mov    lr, #0                   @  in case target is HANDLER_INTERPRET
+    cmp    r0,#0
+    bxne   r0                       @ continue native execution if so
+    b      2f
 
 /*
  * Return from the translation cache and immediately request
  * a translation for the exit target.  Commonly used following
  * invokes.
  */
-    .global dvmJitToTraceSelect
-dvmJitToTraceSelect:
-    ldr    rPC,[r14, #-1]           @ get our target PC
-    add    rINST,r14,#-5            @ save start of chain branch
+    .global dvmJitToInterpTraceSelect
+dvmJitToInterpTraceSelect:
+    ldr    rPC,[lr, #-1]           @ get our target PC
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    add    rINST,lr,#-5            @ save start of chain branch
     mov    r0,rPC
-    bl     dvmJitGetCodeAddr        @ Is there a translation?
+    bl     dvmJitGetCodeAddr       @ Is there a translation?
+    str    r0, [r10, #offThread_inJitCodeCache] @ set the inJitCodeCache flag
     cmp    r0,#0
     beq    2f
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    mov    r1, rPC                  @ arg1 of translation may need this
+    mov    lr, #0                   @ in case target is HANDLER_INTERPRET
     cmp    r0,#0                    @ successful chain?
     bxne   r0                       @ continue native execution
     b      toInterpreter            @ didn't chain - resume with interpreter
@@ -9069,6 +9220,7 @@ dvmJitToTraceSelect:
     GET_JIT_PROF_TABLE(r0)
     FETCH_INST()
     cmp    r0, #0
+    movne  r2,#kJitTSelectRequestHot   @ ask for trace selection
     bne    common_selectTrace
     GET_INST_OPCODE(ip)
     GOTO_OPCODE(ip)
@@ -9089,17 +9241,21 @@ dvmJitToTraceSelect:
  */
     .global dvmJitToInterpNormal
 dvmJitToInterpNormal:
-    ldr    rPC,[r14, #-1]           @ get our target PC
-    add    rINST,r14,#-5            @ save start of chain branch
-#ifdef EXIT_STATS
+    ldr    rPC,[lr, #-1]           @ get our target PC
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
+    add    rINST,lr,#-5            @ save start of chain branch
+#ifdef JIT_STATS
     bl     dvmBumpNormal
 #endif
     mov    r0,rPC
     bl     dvmJitGetCodeAddr        @ Is there a translation?
+    str    r0, [r10, #offThread_inJitCodeCache] @ set the inJitCodeCache flag
     cmp    r0,#0
     beq    toInterpreter            @ go if not, otherwise do chain
     mov    r1,rINST
     bl     dvmJitChain              @ r0<- dvmJitChain(codeAddr,chainAddr)
+    mov    r1, rPC                  @ arg1 of translation may need this
+    mov    lr, #0                   @  in case target is HANDLER_INTERPRET
     cmp    r0,#0                    @ successful chain?
     bxne   r0                       @ continue native execution
     b      toInterpreter            @ didn't chain - resume with interpreter
@@ -9110,13 +9266,18 @@ dvmJitToInterpNormal:
  */
     .global dvmJitToInterpNoChain
 dvmJitToInterpNoChain:
-#ifdef EXIT_STATS
+#ifdef JIT_STATS
     bl     dvmBumpNoChain
 #endif
+    ldr    r10, [rGLUE, #offGlue_self]  @ callee saved r10 <- glue->self
     mov    r0,rPC
     bl     dvmJitGetCodeAddr        @ Is there a translation?
+    str    r0, [r10, #offThread_inJitCodeCache] @ set the inJitCodeCache flag
+    mov    r1, rPC                  @ arg1 of translation may need this
+    mov    lr, #0                   @  in case target is HANDLER_INTERPRET
     cmp    r0,#0
     bxne   r0                       @ continue native execution if so
+#endif
 
 /*
  * No translation, restore interpreter regs and start interpreting.
@@ -9144,11 +9305,11 @@ common_testUpdateProfile:
 
 common_updateProfile:
     eor     r3,rPC,rPC,lsr #12 @ cheap, but fast hash function
-    lsl     r3,r3,#23          @ shift out excess 511
-    ldrb    r1,[r0,r3,lsr #23] @ get counter
+    lsl     r3,r3,#(32 - JIT_PROF_SIZE_LOG_2)          @ shift out excess bits
+    ldrb    r1,[r0,r3,lsr #(32 - JIT_PROF_SIZE_LOG_2)] @ get counter
     GET_INST_OPCODE(ip)
     subs    r1,r1,#1           @ decrement counter
-    strb    r1,[r0,r3,lsr #23] @ and store it
+    strb    r1,[r0,r3,lsr #(32 - JIT_PROF_SIZE_LOG_2)] @ and store it
     GOTO_OPCODE_IFNE(ip)       @ if not threshold, fallthrough otherwise */
 
 /*
@@ -9157,20 +9318,94 @@ common_updateProfile:
  * is already a native translation in place (and, if so,
  * jump to it now).
  */
-    mov     r1,#255
-    strb    r1,[r0,r3,lsr #23] @ reset counter
+    GET_JIT_THRESHOLD(r1)
+    ldr     r10, [rGLUE, #offGlue_self] @ callee saved r10 <- glue->self
+    strb    r1,[r0,r3,lsr #(32 - JIT_PROF_SIZE_LOG_2)] @ reset counter
     EXPORT_PC()
     mov     r0,rPC
     bl      dvmJitGetCodeAddr           @ r0<- dvmJitGetCodeAddr(rPC)
+    str     r0, [r10, #offThread_inJitCodeCache] @ set the inJitCodeCache flag
+    mov     r1, rPC                     @ arg1 of translation may need this
+    mov     lr, #0                      @  in case target is HANDLER_INTERPRET
     cmp     r0,#0
-    beq     common_selectTrace
+#if !defined(WITH_SELF_VERIFICATION)
     bxne    r0                          @ jump to the translation
-common_selectTrace:
     mov     r2,#kJitTSelectRequest      @ ask for trace selection
+    @ fall-through to common_selectTrace
+#else
+    moveq   r2,#kJitTSelectRequest      @ ask for trace selection
+    beq     common_selectTrace
+    /*
+     * At this point, we have a target translation.  However, if
+     * that translation is actually the interpret-only pseudo-translation
+     * we want to treat it the same as no translation.
+     */
+    mov     r10, r0                     @ save target
+    bl      dvmCompilerGetInterpretTemplate
+    cmp     r0, r10                     @ special case?
+    bne     jitSVShadowRunStart         @ set up self verification shadow space
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+    /* no return */
+#endif
+
+/*
+ * On entry:
+ *  r2 is jit state, e.g. kJitTSelectRequest or kJitTSelectRequestHot
+ */
+common_selectTrace:
     str     r2,[rGLUE,#offGlue_jitState]
+    mov     r2,#kInterpEntryInstr       @ normal entry reason
+    str     r2,[rGLUE,#offGlue_entryPoint]
     mov     r1,#1                       @ set changeInterp
     b       common_gotoBail
 
+#if defined(WITH_SELF_VERIFICATION)
+/*
+ * Save PC and registers to shadow memory for self verification mode
+ * before jumping to native translation.
+ * On entry:
+ *    rPC, rFP, rGLUE: the values that they should contain
+ *    r10: the address of the target translation.
+ */
+jitSVShadowRunStart:
+    mov     r0,rPC                      @ r0<- program counter
+    mov     r1,rFP                      @ r1<- frame pointer
+    mov     r2,rGLUE                    @ r2<- InterpState pointer
+    mov     r3,r10                      @ r3<- target translation
+    bl      dvmSelfVerificationSaveState @ save registers to shadow space
+    ldr     rFP,[r0,#offShadowSpace_shadowFP] @ rFP<- fp in shadow space
+    add     rGLUE,r0,#offShadowSpace_interpState @ rGLUE<- rGLUE in shadow space
+    bx      r10                         @ jump to the translation
+
+/*
+ * Restore PC, registers, and interpState to original values
+ * before jumping back to the interpreter.
+ */
+jitSVShadowRunEnd:
+    mov    r1,rFP                        @ pass ending fp
+    bl     dvmSelfVerificationRestoreState @ restore pc and fp values
+    ldr    rPC,[r0,#offShadowSpace_startPC] @ restore PC
+    ldr    rFP,[r0,#offShadowSpace_fp]   @ restore FP
+    ldr    rGLUE,[r0,#offShadowSpace_glue] @ restore InterpState
+    ldr    r1,[r0,#offShadowSpace_svState] @ get self verification state
+    cmp    r1,#0                         @ check for punt condition
+    beq    1f
+    mov    r2,#kJitSelfVerification      @ ask for self verification
+    str    r2,[rGLUE,#offGlue_jitState]
+    mov    r2,#kInterpEntryInstr         @ normal entry reason
+    str    r2,[rGLUE,#offGlue_entryPoint]
+    mov    r1,#1                         @ set changeInterp
+    b      common_gotoBail
+
+1:                                       @ exit to interpreter without check
+    EXPORT_PC()
+    adrl   rIBASE, dvmAsmInstructionStart
+    FETCH_INST()
+    GET_INST_OPCODE(ip)
+    GOTO_OPCODE(ip)
+#endif
+
 #endif
 
 /*
@@ -9212,6 +9447,9 @@ common_backwardBranch:
 common_periodicChecks:
     ldr     r3, [rGLUE, #offGlue_pSelfSuspendCount] @ r3<- &suspendCount
 
+    @ speculatively store r0 before it is clobbered by dvmCheckSuspendPending
+    str     r0, [rGLUE, #offGlue_entryPoint]
+
 #if defined(WITH_DEBUGGER)
     ldr     r1, [rGLUE, #offGlue_pDebuggerActive]   @ r1<- &debuggerActive
 #endif
@@ -9246,13 +9484,24 @@ common_periodicChecks:
     bx      lr                          @ nothing to do, return
 
 2:  @ check suspend
+#if defined(WITH_JIT)
+    /*
+     * Refresh the Jit's cached copy of profile table pointer.  This pointer
+     * doubles as the Jit's on/off switch.
+     */
+    ldr     r3, [rGLUE, #offGlue_ppJitProfTable] @ r3<-&gDvmJit.pJitProfTable
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
+    ldr     r3, [r3] @ r3 <- pJitProfTable
     EXPORT_PC()                         @ need for precise GC
+    str     r3, [rGLUE, #offGlue_pJitProfTable] @ refresh Jit's on/off switch
+#else
+    ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
+    EXPORT_PC()                         @ need for precise GC
+#endif
     b       dvmCheckSuspendPending      @ suspend if necessary, then return
 
 3:  @ debugger/profiler enabled, bail out
     add     rPC, rPC, r9                @ update rPC
-    str     r0, [rGLUE, #offGlue_entryPoint]
     mov     r1, #1                      @ "want switch" = true
     b       common_gotoBail
 
@@ -9444,20 +9693,31 @@ dalvik_mterp:
     @ldr     pc, [r2, #offMethod_nativeFunc] @ pc<- methodToCall->nativeFunc
     LDR_PC_LR "[r2, #offMethod_nativeFunc]"
 
+#if defined(WITH_JIT)
+    ldr     r3, [rGLUE, #offGlue_ppJitProfTable] @ Refresh Jit's on/off status
+#endif
+
     @ native return; r9=self, r10=newSaveArea
     @ equivalent to dvmPopJniLocals
     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved top
     ldr     r1, [r9, #offThread_exception] @ check for exception
+#if defined(WITH_JIT)
+    ldr     r3, [r3]                    @ r3 <- gDvmJit.pProfTable
+#endif
     str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
     cmp     r1, #0                      @ null?
     str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
+#if defined(WITH_JIT)
+    str     r3, [rGLUE, #offGlue_pJitProfTable] @ refresh cached on/off switch
+#endif
     bne     common_exceptionThrown      @ no, handle exception
 
     FETCH_ADVANCE_INST(3)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
-.LstackOverflow:
+.LstackOverflow:    @ r0=methodToCall
+    mov     r1, r0                      @ r1<- methodToCall
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- self
     bl      dvmHandleStackOverflow
     b       common_exceptionThrown
@@ -9521,12 +9781,13 @@ common_returnFromMethod:
     ldr     r1, [r10, #offClassObject_pDvmDex]   @ r1<- method->clazz->pDvmDex
     str     rFP, [r3, #offThread_curFrame]  @ self->curFrame = fp
 #if defined(WITH_JIT)
-    ldr     r3, [r0, #offStackSaveArea_returnAddr] @ r3 = saveArea->returnAddr
+    ldr     r10, [r0, #offStackSaveArea_returnAddr] @ r10 = saveArea->returnAddr
     GET_JIT_PROF_TABLE(r0)
     mov     rPC, r9                     @ publish new rPC
     str     r1, [rGLUE, #offGlue_methodClassDex]
-    cmp     r3, #0                      @ caller is compiled code
-    blxne   r3
+    str     r10, [r3, #offThread_inJitCodeCache]  @ may return to JIT'ed land
+    cmp     r10, #0                      @ caller is compiled code
+    blxne   r10
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
     cmp     r0,#0
     bne     common_updateProfile
@@ -9567,11 +9828,6 @@ common_exceptionThrown:
     mov     r9, #0
     bl      common_periodicChecks
 
-#if defined(WITH_JIT)
-    mov     r2,#kJitTSelectAbort        @ abandon trace selection in progress
-    str     r2,[rGLUE,#offGlue_jitState]
-#endif
-
     ldr     r10, [rGLUE, #offGlue_self] @ r10<- glue->self
     ldr     r9, [r10, #offThread_exception] @ r9<- self->exception
     mov     r1, r10                     @ r1<- self
@@ -9602,6 +9858,7 @@ common_exceptionThrown:
     beq     1f                          @ no, skip ahead
     mov     rFP, r0                     @ save relPc result in rFP
     mov     r0, r10                     @ r0<- self
+    mov     r1, r9                      @ r1<- exception
     bl      dvmCleanupStackOverflow     @ call(self)
     mov     r0, rFP                     @ restore result
 1:
@@ -9639,6 +9896,7 @@ common_exceptionThrown:
     ldrb    r1, [r10, #offThread_stackOverflowed]
     cmp     r1, #0                      @ did we overflow earlier?
     movne   r0, r10                     @ if yes: r0<- self
+    movne   r1, r9                      @ if yes: r1<- exception
     blne    dvmCleanupStackOverflow     @ if yes: call(self)
 
     @ may want to show "not caught locally" debug messages here
diff --git a/vm/mterp/out/InterpC-armv6-vfp.c b/vm/mterp/out/InterpC-armv6-vfp.c
index 2e091f579..17250065a 100644
--- a/vm/mterp/out/InterpC-armv6-vfp.c
+++ b/vm/mterp/out/InterpC-armv6-vfp.c
@@ -302,6 +302,11 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
 #define INST_INST(_inst)    ((_inst) & 0xff)
 
 /*
+ * Replace the opcode (used when handling breakpoints).  _opcode is a u1.
+ */
+#define INST_REPLACE_OP(_inst, _opcode) (((_inst) & 0xff00) | _opcode)
+
+/*
  * Extract the "vA, vB" 4-bit registers from the instruction word (_inst is u2).
  */
 #define INST_A(_inst)       (((_inst) >> 8) & 0x0f)
@@ -338,8 +343,7 @@ static inline void putDoubleToArray(u4* ptr, int idx, double dval)
 #if defined(WITH_JIT)
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
-        dvmJitDebuggerOrProfilerActive(interpState->jitState) :             \
-        !dvmJitDebuggerOrProfilerActive(interpState->jitState) )
+        dvmJitDebuggerOrProfilerActive() : !dvmJitDebuggerOrProfilerActive() )
 #else
 # define NEED_INTERP_SWITCH(_current) (                                     \
     (_current == INTERP_STD) ?                                              \
@@ -418,6 +422,10 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
 #define INTERP_TYPE INTERP_STD
 #define CHECK_DEBUG_AND_PROF() ((void)0)
 # define CHECK_TRACKED_REFS() ((void)0)
+#if defined(WITH_JIT)
+#define CHECK_JIT() (0)
+#define ABORT_JIT_TSELECT() ((void)0)
+#endif
 
 /*
  * In the C mterp stubs, "goto" is a function call followed immediately
@@ -536,7 +544,6 @@ static inline bool checkForNullExportPC(Object* obj, u4* fp, const u2* pc)
         }                                                                   \
     }
 
-
 /* File: c/opcommon.c */
 /* forward declarations of goto targets */
 GOTO_TARGET_DECL(filledNewArray, bool methodCallRange);
author	Ricardo Cerqueira <cyanogenmod@cerqueira.org>	2010-11-09 01:27:18 +0000
committer	Ricardo Cerqueira <cyanogenmod@cerqueira.org>	2010-11-09 01:27:18 +0000
commit	27ed151a61235e67a22629df8b66a01bc16e4502 (patch)
tree	acea55cdf3e34ef259a3b40351f484f786947806
parent	c8257fdc5455bd26cc20f12e6c6cdb9377fda20a (diff)
download	android_dalvik-27ed151a61235e67a22629df8b66a01bc16e4502.tar.gz android_dalvik-27ed151a61235e67a22629df8b66a01bc16e4502.tar.bz2 android_dalvik-27ed151a61235e67a22629df8b66a01bc16e4502.zip