[X86] X86 trace JIT compiler support

This patch provides a fully functional x86 trace JIT compiler for Dalvik VM. It is built on top of the existing x86 fast interpreter with bug fixes and needed extension to support trace JIT interface. The x86 trace JIT code generator was developed independent of the existing template-based code generator and thus does not share exactly the same infrastructure. Included in this patch are: * Deprecated and removed the x86-atom fast interpreter that is no longer functional since ICS. * Augmented x86 fast interpreter to provide interfaces for x86 trace JIT compiler. * Added x86 trace JIT code generator with full JDWP debugging support. * Method JIT and self-verification mode are not supported. The x86 code generator uses the x86 instruction encoder/decoder library from the Apache Harmony project. Additional wrapper extension and bug fixes were added to support the x86 trace JIT code generator. The x86 instruction encoder/decoder is embedded inside the x86 code generator under the libenc subdirectory. Change-Id: I241113681963a16c13a3562390813cbaaa6eedf0 Signed-off-by: Dong-Yuan Chen <dong-yuan.chen@intel.com> Signed-off-by: Yixin Shou <yixin.shou@intel.com> Signed-off-by: Johnnie Birch <johnnie.l.birch.jr@intel.com> Signed-off-by: Udayan <udayan.banerji@intel.com> Signed-off-by: Sushma Kyasaralli Thimmappa <sushma.kyasaralli.thimmappa@intel.com> Signed-off-by: Bijoy Jose <bijoy.a.jose@intel.com> Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com> Signed-off-by: Tim Hartley <timothy.d.hartley@intel.com>
author: Dong-Yuan Chen <dong-yuan.chen@intel.com> 2012-07-03 13:13:07 -0700
committer: Elliott Hughes <enh@google.com> 2012-07-20 12:27:18 -0700
commit: 0c2dc522d0e120f346cf0a40c8cf0c93346131c2 (patch)
tree: f7ca4c8e3ca1150b7e8c5f4b68c60972641dc77f /vm/compiler/codegen/x86/LowerAlu.cpp
parent: abede656a1f2330e3d31281fb208be7c04e8eb56 (diff)
download: android_dalvik-0c2dc522d0e120f346cf0a40c8cf0c93346131c2.tar.gz
android_dalvik-0c2dc522d0e120f346cf0a40c8cf0c93346131c2.tar.bz2
android_dalvik-0c2dc522d0e120f346cf0a40c8cf0c93346131c2.zip
1 files changed, 1962 insertions, 0 deletions
diff --git a/vm/compiler/codegen/x86/LowerAlu.cpp b/vm/compiler/codegen/x86/LowerAlu.cpp
new file mode 100644
index 000000000..2231baca1
--- /dev/null
+++ b/vm/compiler/codegen/x86/LowerAlu.cpp
@@ -0,0 +1,1962 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*! \file LowerAlu.cpp
+    \brief This file lowers ALU bytecodes.
+*/
+#include "libdex/DexOpcodes.h"
+#include "libdex/DexFile.h"
+#include "Lower.h"
+#include "NcgAot.h"
+#include "enc_wrapper.h"
+
+/////////////////////////////////////////////
+#define P_GPR_1 PhysicalReg_EBX
+//! lower bytecode NEG_INT
+
+//!
+int op_neg_int() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_unary_reg(OpndSize_32, neg_opc, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode NOT_INT
+
+//!
+int op_not_int() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_unary_reg(OpndSize_32, not_opc, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+#undef P_GPR_1
+//! lower bytecode NEG_LONG
+
+//! This implementation uses XMM registers
+int op_neg_long() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_64, 1, false);
+    alu_binary_reg_reg(OpndSize_64, xor_opc, 2, false, 2, false);
+    alu_binary_reg_reg(OpndSize_64, sub_opc, 1, false, 2, false);
+    set_virtual_reg(vA, OpndSize_64, 2, false);
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode NOT_LONG
+
+//! This implementation uses XMM registers
+int op_not_long() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_64, 1, false);
+    load_global_data_API("64bits", OpndSize_64, 2, false);
+    alu_binary_reg_reg(OpndSize_64, andn_opc, 2, false, 1, false);
+    set_virtual_reg(vA, OpndSize_64, 1, false);
+    rPC += 1;
+    return 0;
+}
+#define P_GPR_1 PhysicalReg_EBX
+//! lower bytecode NEG_FLOAT
+
+//! This implementation uses GPR
+int op_neg_float() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_binary_imm_reg(OpndSize_32, add_opc, 0x80000000, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+#undef P_GPR_1
+
+//! lower bytecode NEG_DOUBLE
+
+//! This implementation uses XMM registers
+int op_neg_double() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_64, 1, false);
+    load_global_data_API("doubNeg", OpndSize_64, 2, false);
+    alu_binary_reg_reg(OpndSize_64, xor_opc, 1, false, 2, false);
+    set_virtual_reg(vA, OpndSize_64, 2, false);
+    rPC += 1;
+    return 0;
+}
+
+//! lower bytecode INT_TO_LONG
+
+//! It uses native instruction cdq
+int op_int_to_long() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
+    convert_integer(OpndSize_32, OpndSize_64);
+    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
+    set_virtual_reg(vA+1, OpndSize_32, PhysicalReg_EDX, true);
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode INT_TO_FLOAT
+
+//! This implementation uses FP stack
+int op_int_to_float() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    load_int_fp_stack_VR(OpndSize_32, vB); //fildl
+    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode INT_TO_DOUBLE
+
+//! This implementation uses FP stack
+int op_int_to_double() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    load_int_fp_stack_VR(OpndSize_32, vB); //fildl
+    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode LONG_TO_FLOAT
+
+//! This implementation uses FP stack
+int op_long_to_float() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    load_int_fp_stack_VR(OpndSize_64, vB); //fildll
+    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode LONG_TO_DOUBLE
+
+//! This implementation uses FP stack
+int op_long_to_double() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    load_int_fp_stack_VR(OpndSize_64, vB); //fildll
+    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode FLOAT_TO_DOUBLE
+
+//! This implementation uses FP stack
+int op_float_to_double() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    load_fp_stack_VR(OpndSize_32, vB); //flds
+    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode DOUBLE_TO_FLOAT
+
+//! This implementation uses FP stack
+int op_double_to_float() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    load_fp_stack_VR(OpndSize_64, vB); //fldl
+    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
+    rPC += 1;
+    return 0;
+}
+#define P_GPR_1 PhysicalReg_EBX
+//! lower bytecode LONG_TO_INT
+
+//! This implementation uses GPR
+int op_long_to_int() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+#undef P_GPR_1
+
+//! common code to convert a float or double to integer
+
+//! It uses FP stack
+int common_fp_to_int(bool isDouble, u2 vA, u2 vB) {
+    if(isDouble) {
+        load_fp_stack_VR(OpndSize_64, vB); //fldl
+    }
+    else {
+        load_fp_stack_VR(OpndSize_32, vB); //flds
+    }
+
+    load_fp_stack_global_data_API("intMax", OpndSize_32);
+    load_fp_stack_global_data_API("intMin", OpndSize_32);
+
+    //ST(0) ST(1) ST(2) --> LintMin LintMax value
+    compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
+    //ST(0) ST(1) --> LintMax value
+    conditional_jump(Condition_AE, ".float_to_int_negInf", true);
+    rememberState(1);
+    compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
+    //ST(0) --> value
+    rememberState(2);
+    conditional_jump(Condition_C, ".float_to_int_nanInf", true);
+    //fnstcw, orw, fldcw, xorw
+    load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
+    alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
+    load_fpu_cw(0, PhysicalReg_ESP, true);
+    alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
+    store_int_fp_stack_VR(true/*pop*/, OpndSize_32, vA); //fistpl
+    //fldcw
+    load_fpu_cw(0, PhysicalReg_ESP, true);
+    load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    rememberState(3);
+    unconditional_jump(".float_to_int_okay", true);
+    insertLabel(".float_to_int_nanInf", true);
+    conditional_jump(Condition_NP, ".float_to_int_posInf", true);
+    //fstps CHECK
+    goToState(2);
+    store_fp_stack_VR(true, OpndSize_32, vA);
+    set_VR_to_imm(vA, OpndSize_32, 0);
+    transferToState(3);
+    unconditional_jump(".float_to_int_okay", true);
+    insertLabel(".float_to_int_posInf", true);
+    //fstps CHECK
+    goToState(2);
+    store_fp_stack_VR(true, OpndSize_32, vA);
+    set_VR_to_imm(vA, OpndSize_32, 0x7fffffff);
+    transferToState(3);
+    unconditional_jump(".float_to_int_okay", true);
+    insertLabel(".float_to_int_negInf", true);
+    goToState(1);
+    //fstps CHECK
+    store_fp_stack_VR(true, OpndSize_32, vA);
+    store_fp_stack_VR(true, OpndSize_32, vA);
+    set_VR_to_imm(vA, OpndSize_32, 0x80000000);
+    transferToState(3);
+    insertLabel(".float_to_int_okay", true);
+    return 0;
+}
+//! lower bytecode FLOAT_TO_INT by calling common_fp_to_int
+
+//!
+int op_float_to_int() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    int retval = common_fp_to_int(false, vA, vB);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode DOUBLE_TO_INT by calling common_fp_to_int
+
+//!
+int op_double_to_int() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    int retval = common_fp_to_int(true, vA, vB);
+    rPC += 1;
+    return retval;
+}
+
+//! common code to convert float or double to long
+
+//! It uses FP stack
+int common_fp_to_long(bool isDouble, u2 vA, u2 vB) {
+    if(isDouble) {
+        load_fp_stack_VR(OpndSize_64, vB); //fldl
+    }
+    else {
+        load_fp_stack_VR(OpndSize_32, vB); //flds
+    }
+
+    load_fp_stack_global_data_API("valuePosInfLong", OpndSize_64);
+    load_fp_stack_global_data_API("valueNegInfLong", OpndSize_64);
+
+    //ST(0) ST(1) ST(2) --> LintMin LintMax value
+    compare_fp_stack(true, 2, false/*isDouble*/); //ST(2)
+    //ST(0) ST(1) --> LintMax value
+    conditional_jump(Condition_AE, ".float_to_long_negInf", true);
+    rememberState(1);
+    compare_fp_stack(true, 1, false/*isDouble*/); //ST(1)
+    rememberState(2);
+    //ST(0) --> value
+    conditional_jump(Condition_C, ".float_to_long_nanInf", true);
+    //fnstcw, orw, fldcw, xorw
+    load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true);
+    alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true);
+    load_fpu_cw(0, PhysicalReg_ESP, true);
+    alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true);
+    store_int_fp_stack_VR(true/*pop*/, OpndSize_64, vA); //fistpll
+    //fldcw
+    load_fpu_cw(0, PhysicalReg_ESP, true);
+    load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    rememberState(3);
+    unconditional_jump(".float_to_long_okay", true);
+    insertLabel(".float_to_long_nanInf", true);
+    conditional_jump(Condition_NP, ".float_to_long_posInf", true);
+    //fstpl??
+    goToState(2);
+
+    load_global_data_API("valueNanLong", OpndSize_64, 1, false);
+
+    set_virtual_reg(vA, OpndSize_64, 1, false);
+    transferToState(3);
+    unconditional_jump(".float_to_long_okay", true);
+    insertLabel(".float_to_long_posInf", true);
+    //fstpl
+    goToState(2);
+
+    load_global_data_API("valuePosInfLong", OpndSize_64, 2, false);
+    set_virtual_reg(vA, OpndSize_64, 2, false);
+    transferToState(3);
+    unconditional_jump(".float_to_long_okay", true);
+    insertLabel(".float_to_long_negInf", true);
+    //fstpl
+    //fstpl
+    goToState(1);
+
+    load_global_data_API("valueNegInfLong", OpndSize_64, 3, false);
+    set_virtual_reg(vA, OpndSize_64, 3, false);
+    transferToState(3);
+    insertLabel(".float_to_long_okay", true);
+    return 0;
+}
+//! lower bytecode FLOAT_TO_LONG by calling common_fp_to_long
+
+//!
+int op_float_to_long() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    int retval = common_fp_to_long(false, vA, vB);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode DOUBLE_TO_LONG by calling common_fp_to_long
+
+//!
+int op_double_to_long() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    int retval = common_fp_to_long(true, vA, vB);
+    rPC += 1;
+    return retval;
+}
+#define P_GPR_1 PhysicalReg_EBX
+//! lower bytecode INT_TO_BYTE
+
+//! It uses GPR
+int op_int_to_byte() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_binary_imm_reg(OpndSize_32, sal_opc, 24, 1, false);
+    alu_binary_imm_reg(OpndSize_32, sar_opc, 24, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode INT_TO_CHAR
+
+//! It uses GPR
+int op_int_to_char() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
+    alu_binary_imm_reg(OpndSize_32, shr_opc, 16, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+//! lower bytecode INT_TO_SHORT
+
+//! It uses GPR
+int op_int_to_short() {
+    u2 vA = INST_A(inst); //destination
+    u2 vB = INST_B(inst);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false);
+    alu_binary_imm_reg(OpndSize_32, sar_opc, 16, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    rPC += 1;
+    return 0;
+}
+//! common code to handle integer ALU ops
+
+//! It uses GPR
+int common_alu_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
+    get_virtual_reg(v1, OpndSize_32, 1, false);
+    //in encoder, reg is first operand, which is the destination
+    //gpr_1 op v2(rFP) --> gpr_1
+    //shift only works with reg cl, v2 should be in %ecx
+    alu_binary_VR_reg(OpndSize_32, opc, v2, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    return 0;
+}
+#undef P_GPR_1
+#define P_GPR_1 PhysicalReg_EBX
+//! common code to handle integer shift ops
+
+//! It uses GPR
+int common_shift_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {
+    get_virtual_reg(v2, OpndSize_32, PhysicalReg_ECX, true);
+    get_virtual_reg(v1, OpndSize_32, 1, false);
+    //in encoder, reg2 is first operand, which is the destination
+    //gpr_1 op v2(rFP) --> gpr_1
+    //shift only works with reg cl, v2 should be in %ecx
+    alu_binary_reg_reg(OpndSize_32, opc, PhysicalReg_ECX, true, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    return 0;
+}
+#undef p_GPR_1
+//! lower bytecode ADD_INT by calling common_alu_int
+
+//!
+int op_add_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_alu_int(add_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SUB_INT by calling common_alu_int
+
+//!
+int op_sub_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_alu_int(sub_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode MUL_INT by calling common_alu_int
+
+//!
+int op_mul_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_alu_int(imul_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode AND_INT by calling common_alu_int
+
+//!
+int op_and_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_alu_int(and_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode OR_INT by calling common_alu_int
+
+//!
+int op_or_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_alu_int(or_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode XOR_INT by calling common_alu_int
+
+//!
+int op_xor_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_alu_int(xor_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHL_INT by calling common_shift_int
+
+//!
+int op_shl_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_shift_int(shl_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHR_INT by calling common_shift_int
+
+//!
+int op_shr_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_shift_int(sar_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode USHR_INT by calling common_shift_int
+
+//!
+int op_ushr_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_shift_int(shr_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode ADD_INT_2ADDR by calling common_alu_int
+
+//!
+int op_add_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_alu_int(add_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SUB_INT_2ADDR by calling common_alu_int
+
+//!
+int op_sub_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_alu_int(sub_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode MUL_INT_2ADDR by calling common_alu_int
+
+//!
+int op_mul_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_alu_int(imul_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode AND_INT_2ADDR by calling common_alu_int
+
+//!
+int op_and_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_alu_int(and_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode OR_INT_2ADDR by calling common_alu_int
+
+//!
+int op_or_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_alu_int(or_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode XOR_INT_2ADDR by calling common_alu_int
+
+//!
+int op_xor_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_alu_int(xor_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SHL_INT_2ADDR by calling common_shift_int
+
+//!
+int op_shl_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_shift_int(shl_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SHR_INT_2ADDR by calling common_shift_int
+
+//!
+int op_shr_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_shift_int(sar_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode USHR_INT_2ADDR by calling common_shift_int
+
+//!
+int op_ushr_int_2addr() {
+    u2 vA, v1, v2;
+    vA = INST_A(inst);
+    v1 = vA;
+    v2 = INST_B(inst);
+    int retval = common_shift_int(shr_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+#define P_GPR_1 PhysicalReg_EBX
+//!common code to handle integer DIV & REM, it used GPR
+
+//!The special case: when op0 == minint && op1 == -1, return 0 for isRem, return 0x80000000 for isDiv
+//!There are two merge points in the control flow for this bytecode
+//!make sure the reg. alloc. state is the same at merge points by calling transferToState
+int common_div_rem_int(bool isRem, u2 vA, u2 v1, u2 v2) {
+    get_virtual_reg(v1, OpndSize_32, PhysicalReg_EAX, true);
+    get_virtual_reg(v2, OpndSize_32, 2, false);
+    compare_imm_reg(OpndSize_32, 0, 2, false);
+    handlePotentialException(
+                                       Condition_E, Condition_NE,
+                                       1, "common_errDivideByZero");
+    /////////////////// handle special cases
+    //conditional move 0 to $edx for rem for the two special cases
+    //conditional move 0x80000000 to $eax for div
+    //handle -1 special case divide error
+    compare_imm_reg(OpndSize_32, -1, 2, false);
+    conditional_jump(Condition_NE, ".common_div_rem_int_normal", true);
+    //handle min int special case divide error
+    rememberState(1);
+    compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
+    transferToState(1);
+    conditional_jump(Condition_E, ".common_div_rem_int_special", true);
+
+    insertLabel(".common_div_rem_int_normal", true); //merge point
+    convert_integer(OpndSize_32, OpndSize_64); //cdq
+    //idiv: dividend in edx:eax; quotient in eax; remainder in edx
+    alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
+    if(isRem)
+        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
+    else //divide: quotient in %eax
+        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
+    rememberState(2);
+    unconditional_jump(".common_div_rem_int_okay", true);
+
+    insertLabel(".common_div_rem_int_special", true);
+    goToState(1);
+    if(isRem)
+        set_VR_to_imm(vA, OpndSize_32, 0);
+    else
+        set_VR_to_imm(vA, OpndSize_32, 0x80000000);
+    transferToState(2);
+    insertLabel(".common_div_rem_int_okay", true); //merge point 2
+    return 0;
+}
+#undef P_GPR_1
+//! lower bytecode DIV_INT by calling common_div_rem_int
+
+//!
+int op_div_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_div_rem_int(false, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode REM_INT by calling common_div_rem_int
+
+//!
+int op_rem_int() {
+    u2 vA, v1, v2;
+    vA = INST_AA(inst);
+    v1 = *((u1*)rPC + 2);
+    v2 = *((u1*)rPC + 3);
+    int retval = common_div_rem_int(true, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode DIV_INT_2ADDR by calling common_div_rem_int
+
+//!
+int op_div_int_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_div_rem_int(false, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode REM_INT_2ADDR by calling common_div_rem_int
+
+//!
+int op_rem_int_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_div_rem_int(true, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+
+#define P_GPR_1 PhysicalReg_EBX
+//! common code to handle integer ALU ops with literal
+
+//! It uses GPR
+int common_alu_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { //except div and rem
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_binary_imm_reg(OpndSize_32, opc, imm, 1, false);
+    set_virtual_reg(vA, OpndSize_32, 1, false);
+    return 0;
+}
+//! calls common_alu_int_lit
+int common_shift_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) {
+    return common_alu_int_lit(opc, vA, vB, imm);
+}
+#undef p_GPR_1
+//! lower bytecode ADD_INT_LIT16 by calling common_alu_int_lit
+
+//!
+int op_add_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+
+int alu_rsub_int(ALU_Opcode opc, u2 vA, s2 imm, u2 vB) {
+    move_imm_to_reg(OpndSize_32, imm, 2, false);
+    get_virtual_reg(vB, OpndSize_32, 1, false);
+    alu_binary_reg_reg(OpndSize_32, opc, 1, false, 2, false);
+    set_virtual_reg(vA, OpndSize_32, 2, false);
+    return 0;
+}
+
+
+//! lower bytecode RSUB_INT by calling common_alu_int_lit
+
+//!
+int op_rsub_int() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode MUL_INT_LIT16 by calling common_alu_int_lit
+
+//!
+int op_mul_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode AND_INT_LIT16 by calling common_alu_int_lit
+
+//!
+int op_and_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode OR_INT_LIT16 by calling common_alu_int_lit
+
+//!
+int op_or_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode XOR_INT_LIT16 by calling common_alu_int_lit
+
+//!
+int op_xor_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHL_INT_LIT16 by calling common_shift_int_lit
+
+//!
+int op_shl_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHR_INT_LIT16 by calling common_shift_int_lit
+
+//!
+int op_shr_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode USHR_INT_LIT16 by calling common_shift_int_lit
+
+//!
+int op_ushr_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode ADD_INT_LIT8 by calling common_alu_int_lit
+
+//!
+int op_add_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_alu_int_lit(add_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode RSUB_INT_LIT8 by calling common_alu_int_lit
+
+//!
+int op_rsub_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = alu_rsub_int(sub_opc, vA, tmp, vB);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode MUL_INT_LIT8 by calling common_alu_int_lit
+
+//!
+int op_mul_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_alu_int_lit(imul_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode AND_INT_LIT8 by calling common_alu_int_lit
+
+//!
+int op_and_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_alu_int_lit(and_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode OR_INT_LIT8 by calling common_alu_int_lit
+
+//!
+int op_or_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_alu_int_lit(or_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode XOR_INT_LIT8 by calling common_alu_int_lit
+
+//!
+int op_xor_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_alu_int_lit(xor_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHL_INT_LIT8 by calling common_shift_int_lit
+
+//!
+int op_shl_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_shift_int_lit(shl_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHR_INT_LIT8 by calling common_shift_int_lit
+
+//!
+int op_shr_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_shift_int_lit(sar_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode USHR_INT_LIT8 by calling common_shift_int_lit
+
+//!
+int op_ushr_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_shift_int_lit(shr_opc, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+
+int isPowerOfTwo(int imm) {
+    int i;
+    for(i = 1; i < 17; i++) {
+        if(imm == (1 << i)) return i;
+    }
+    return -1;
+}
+
+#define P_GPR_1 PhysicalReg_EBX
+int div_lit_strength_reduction(u2 vA, u2 vB, s2 imm) {
+    if(gDvm.executionMode == kExecutionModeNcgO1) {
+        //strength reduction for div by 2,4,8,...
+        int power = isPowerOfTwo(imm);
+        if(power < 1) return 0;
+        //tmp2 is not updated, so it can share with vB
+        get_virtual_reg(vB, OpndSize_32, 2, false);
+        //if imm is 2, power will be 1
+        if(power == 1) {
+            /* mov tmp1, tmp2
+               shrl $31, tmp1
+               addl tmp2, tmp1
+               sarl $1, tmp1 */
+            move_reg_to_reg(OpndSize_32, 2, false, 1, false);
+            alu_binary_imm_reg(OpndSize_32, shr_opc, 31, 1, false);
+            alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
+            alu_binary_imm_reg(OpndSize_32, sar_opc, 1, 1, false);
+            set_virtual_reg(vA, OpndSize_32, 1, false);
+            return 1;
+        }
+        //power > 1
+        /* mov tmp1, tmp2
+           sarl $power-1, tmp1
+           shrl 32-$power, tmp1
+           addl tmp2, tmp1
+           sarl $power, tmp1 */
+        move_reg_to_reg(OpndSize_32, 2, false, 1, false);
+        alu_binary_imm_reg(OpndSize_32, sar_opc, power-1, 1, false);
+        alu_binary_imm_reg(OpndSize_32, shr_opc, 32-power, 1, false);
+        alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
+        alu_binary_imm_reg(OpndSize_32, sar_opc, power, 1, false);
+        set_virtual_reg(vA, OpndSize_32, 1, false);
+        return 1;
+    }
+    return 0;
+}
+
+////////// throws exception!!!
+//! common code to handle integer DIV & REM with literal
+
+//! It uses GPR
+int common_div_rem_int_lit(bool isRem, u2 vA, u2 vB, s2 imm) {
+    if(!isRem) {
+        int retCode = div_lit_strength_reduction(vA, vB, imm);
+        if(retCode > 0) return 0;
+    }
+    if(imm == 0) {
+        export_pc(); //use %edx
+#ifdef DEBUG_EXCEPTION
+        LOGI("EXTRA code to handle exception");
+#endif
+        constVREndOfBB();
+        beforeCall("exception"); //dump GG, GL VRs
+        unconditional_jump_global_API(
+                          "common_errDivideByZero", false);
+
+        return 0;
+    }
+    get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true);
+    //check against -1 for DIV_INT??
+    if(imm == -1) {
+        compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true);
+        conditional_jump(Condition_E, ".div_rem_int_lit_special", true);
+        rememberState(1);
+    }
+    move_imm_to_reg(OpndSize_32, imm, 2, false);
+    convert_integer(OpndSize_32, OpndSize_64); //cdq
+    //idiv: dividend in edx:eax; quotient in eax; remainder in edx
+    alu_unary_reg(OpndSize_32, idiv_opc, 2, false);
+    if(isRem)
+        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true);
+    else
+        set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
+
+    if(imm == -1) {
+        unconditional_jump(".div_rem_int_lit_okay", true);
+        rememberState(2);
+        insertLabel(".div_rem_int_lit_special", true);
+        goToState(1);
+        if(isRem)
+            set_VR_to_imm(vA, OpndSize_32, 0);
+        else
+            set_VR_to_imm(vA, OpndSize_32, 0x80000000);
+        transferToState(2);
+    }
+
+    insertLabel(".div_rem_int_lit_okay", true); //merge point 2
+    return 0;
+}
+#undef P_GPR_1
+//! lower bytecode DIV_INT_LIT16 by calling common_div_rem_int_lit
+
+//!
+int op_div_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_div_rem_int_lit(false, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode REM_INT_LIT16 by calling common_div_rem_int_lit
+
+//!
+int op_rem_int_lit16() {
+    u2 vA = INST_A(inst);
+    u2 vB = INST_B(inst);
+    s4 tmp = (s2)FETCH(1);
+    int retval = common_div_rem_int_lit(true, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode DIV_INT_LIT8 by calling common_div_rem_int_lit
+
+//!
+int op_div_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_div_rem_int_lit(false, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode REM_INT_LIT8 by calling common_div_rem_int_lit
+
+//!
+int op_rem_int_lit8() {
+    u2 vA = INST_AA(inst);
+    u2 vB = (u2)FETCH(1) & 0xff;
+    s2 tmp = (s2)FETCH(1) >> 8;
+    int retval = common_div_rem_int_lit(true, vA, vB, tmp);
+    rPC += 2;
+    return retval;
+}
+//! common code to hanle long ALU ops
+
+//! It uses XMM
+int common_alu_long(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem
+    get_virtual_reg(v1, OpndSize_64, 1, false);
+    get_virtual_reg(v2, OpndSize_64, 2, false);
+    alu_binary_reg_reg(OpndSize_64, opc, 2, false, 1, false);
+    set_virtual_reg(vA, OpndSize_64, 1, false);
+    return 0;
+}
+//! lower bytecode ADD_LONG by calling common_alu_long
+
+//!
+int op_add_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_long(add_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SUB_LONG by calling common_alu_long
+
+//!
+int op_sub_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_long(sub_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode AND_LONG by calling common_alu_long
+
+//!
+int op_and_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_long(and_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode OR_LONG by calling common_alu_long
+
+//!
+int op_or_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_long(or_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode XOR_LONG by calling common_alu_long
+
+//!
+int op_xor_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_long(xor_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode ADD_LONG_2ADDR by calling common_alu_long
+
+//!
+int op_add_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_long(add_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SUB_LONG_2ADDR by calling common_alu_long
+
+//!
+int op_sub_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_long(sub_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode AND_LONG_2ADDR by calling common_alu_long
+
+//!
+int op_and_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_long(and_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode OR_LONG_2ADDR by calling common_alu_long
+
+//!
+int op_or_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_long(or_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode XOR_LONG_2ADDR by calling common_alu_long
+
+//!
+int op_xor_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_long(xor_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+
+//signed vs unsigned imul and mul?
+#define P_GPR_1 PhysicalReg_EBX
+#define P_GPR_2 PhysicalReg_ECX
+#define P_GPR_3 PhysicalReg_ESI
+//! common code to handle multiplication of long
+
+//! It uses GPR
+int common_mul_long(u2 vA, u2 v1, u2 v2) {
+    get_virtual_reg(v2, OpndSize_32, 1, false);
+    move_reg_to_reg(OpndSize_32, 1, false, PhysicalReg_EAX, true);
+    //imul: 2L * 1H update temporary 1
+    alu_binary_VR_reg(OpndSize_32, imul_opc, (v1+1), 1, false);
+    get_virtual_reg(v1, OpndSize_32, 3, false);
+    move_reg_to_reg(OpndSize_32, 3, false, 2, false);
+    //imul: 1L * 2H
+    alu_binary_VR_reg(OpndSize_32, imul_opc, (v2+1), 2, false);
+    alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false);
+    alu_unary_reg(OpndSize_32, mul_opc, 3, false);
+    alu_binary_reg_reg(OpndSize_32, add_opc, PhysicalReg_EDX, true, 1, false);
+    set_virtual_reg(vA+1, OpndSize_32, 1, false);
+    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
+    return 0;
+}
+#undef P_GPR_1
+#undef P_GPR_2
+#undef P_GPR_3
+//! lower bytecode MUL_LONG by calling common_mul_long
+
+//!
+int op_mul_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_mul_long(vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode MUL_LONG_2ADDR by calling common_mul_long
+
+//!
+int op_mul_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_mul_long(vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+
+#define P_GPR_1 PhysicalReg_EBX
+#define P_GPR_2 PhysicalReg_ECX
+//! common code to handle DIV & REM of long
+
+//! It uses GPR & XMM; and calls call_moddi3 & call_divdi3
+int common_div_rem_long(bool isRem, u2 vA, u2 v1, u2 v2) {
+    get_virtual_reg(v2, OpndSize_32, 1, false);
+    get_virtual_reg(v2+1, OpndSize_32, 2, false);
+    //save to native stack before changing register P_GPR_1
+    load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    move_reg_to_mem(OpndSize_32, 1, false, 8, PhysicalReg_ESP, true);
+    alu_binary_reg_reg(OpndSize_32, or_opc, 2, false, 1, false);
+
+    handlePotentialException(
+                                       Condition_E, Condition_NE,
+                                       1, "common_errDivideByZero");
+    move_reg_to_mem(OpndSize_32, 2, false, 12, PhysicalReg_ESP, true);
+    get_virtual_reg(v1, OpndSize_64, 1, false);
+    move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
+    scratchRegs[0] = PhysicalReg_SCRATCH_1;
+    nextVersionOfHardReg(PhysicalReg_EDX, 2); //next version has 2 refs
+    if(isRem)
+        call_moddi3();
+    else
+        call_divdi3();
+    load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    set_virtual_reg(vA+1, OpndSize_32,PhysicalReg_EDX, true);
+    set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true);
+    return 0;
+}
+#undef P_GPR_1
+#undef P_GPR_2
+//! lower bytecode DIV_LONG by calling common_div_rem_long
+
+//!
+int op_div_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_div_rem_long(false, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode REM_LONG by calling common_div_rem_long
+
+//!
+int op_rem_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_div_rem_long(true, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode DIV_LONG_2ADDR by calling common_div_rem_long
+
+//!
+int op_div_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_div_rem_long(false, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode REM_LONG_2ADDR by calling common_div_rem_long
+
+//!
+int op_rem_long_2addr() { //call __moddi3 instead of __divdi3
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_div_rem_long(true, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+
+//! common code to handle SHL long
+
+//! It uses XMM
+int common_shl_long(u2 vA, u2 v1, u2 v2) {
+    get_VR_ss(v2, 2, false);
+
+    load_global_data_API("shiftMask", OpndSize_64, 3, false);
+
+    get_virtual_reg(v1, OpndSize_64, 1, false);
+    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
+    alu_binary_reg_reg(OpndSize_64, sll_opc, 2, false, 1, false);
+    set_virtual_reg(vA, OpndSize_64, 1, false);
+    return 0;
+}
+
+//! common code to handle SHR long
+
+//! It uses XMM
+int common_shr_long(u2 vA, u2 v1, u2 v2) {
+    get_VR_ss(v2, 2, false);
+
+    load_global_data_API("shiftMask", OpndSize_64, 3, false);
+
+    get_virtual_reg(v1, OpndSize_64, 1, false);
+    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
+    alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
+    compare_imm_VR(OpndSize_32, 0, (v1+1));
+    conditional_jump(Condition_GE, ".common_shr_long_special", true);
+    rememberState(1);
+
+    load_global_data_API("value64", OpndSize_64, 4, false);
+
+    alu_binary_reg_reg(OpndSize_64, sub_opc, 2, false, 4, false);
+
+    load_global_data_API("64bits", OpndSize_64, 5, false);
+
+    alu_binary_reg_reg(OpndSize_64, sll_opc, 4, false, 5, false);
+    alu_binary_reg_reg(OpndSize_64, or_opc, 5, false, 1, false);
+    rememberState(2);
+    //check whether the target is next instruction TODO
+    unconditional_jump(".common_shr_long_done", true);
+
+    insertLabel(".common_shr_long_special", true);
+    goToState(1);
+    transferToState(2);
+    insertLabel(".common_shr_long_done", true);
+    set_virtual_reg(vA, OpndSize_64, 1, false);
+    return 0;
+}
+
+//! common code to handle USHR long
+
+//! It uses XMM
+int common_ushr_long(u2 vA, u2 v1, u2 v2) {
+    get_VR_sd(v1, 1, false);
+    get_VR_ss(v2, 2, false);
+
+    load_sd_global_data_API("shiftMask", 3, false);
+
+    alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false);
+    alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false);
+    set_VR_sd(vA, 1, false);
+    return 0;
+}
+//! lower bytecode SHL_LONG by calling common_shl_long
+
+//!
+int op_shl_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_shl_long(vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHL_LONG_2ADDR by calling common_shl_long
+
+//!
+int op_shl_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_shl_long(vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SHR_LONG by calling common_shr_long
+
+//!
+int op_shr_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_shr_long(vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SHR_LONG_2ADDR by calling common_shr_long
+
+//!
+int op_shr_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_shr_long(vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode USHR_LONG by calling common_ushr_long
+
+//!
+int op_ushr_long() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_ushr_long(vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode USHR_LONG_2ADDR by calling common_ushr_long
+
+//!
+int op_ushr_long_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_ushr_long(vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+#define USE_MEM_OPERAND
+///////////////////////////////////////////
+//! common code to handle ALU of floats
+
+//! It uses XMM
+int common_alu_float(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
+    get_VR_ss(v1, 1, false);
+#ifdef USE_MEM_OPERAND
+    alu_sd_binary_VR_reg(opc, v2, 1, false, false/*isSD*/);
+#else
+    get_VR_ss(v2, 2, false);
+    alu_ss_binary_reg_reg(opc, 2, false, 1, false);
+#endif
+    set_VR_ss(vA, 1, false);
+    return 0;
+}
+//! lower bytecode ADD_FLOAT by calling common_alu_float
+
+//!
+int op_add_float() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_float(add_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SUB_FLOAT by calling common_alu_float
+
+//!
+int op_sub_float() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_float(sub_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode MUL_FLOAT by calling common_alu_float
+
+//!
+int op_mul_float() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_float(mul_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode ADD_FLOAT_2ADDR by calling common_alu_float
+
+//!
+int op_add_float_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_float(add_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SUB_FLOAT_2ADDR by calling common_alu_float
+
+//!
+int op_sub_float_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_float(sub_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode MUL_FLOAT_2ADDR by calling common_alu_float
+
+//!
+int op_mul_float_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_float(mul_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! common code to handle DIV of float
+
+//! It uses FP stack
+int common_div_float(u2 vA, u2 v1, u2 v2) {
+    load_fp_stack_VR(OpndSize_32, v1); //flds
+    fpu_VR(div_opc, OpndSize_32, v2);
+    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
+    return 0;
+}
+//! lower bytecode DIV_FLOAT by calling common_div_float
+
+//!
+int op_div_float() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_float(div_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode DIV_FLOAT_2ADDR by calling common_div_float
+
+//!
+int op_div_float_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_float(div_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! common code to handle DIV of double
+
+//! It uses XMM
+int common_alu_double(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul
+    get_VR_sd(v1, 1, false);
+#ifdef USE_MEM_OPERAND
+    alu_sd_binary_VR_reg(opc, v2, 1, false, true /*isSD*/);
+#else
+    get_VR_sd(v2, 2, false);
+    alu_sd_binary_reg_reg(opc, 2, false, 1, false);
+#endif
+    set_VR_sd(vA, 1, false);
+    return 0;
+}
+//! lower bytecode ADD_DOUBLE by calling common_alu_double
+
+//!
+int op_add_double() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_double(add_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode SUB_DOUBLE by calling common_alu_double
+
+//!
+int op_sub_double() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_double(sub_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode MUL_DOUBLE by calling common_alu_double
+
+//!
+int op_mul_double() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_double(mul_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode ADD_DOUBLE_2ADDR by calling common_alu_double
+
+//!
+int op_add_double_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_double(add_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode SUB_DOUBLE_2ADDR by calling common_alu_double
+
+//!
+int op_sub_double_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_double(sub_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode MUL_DOUBLE_2ADDR by calling common_alu_double
+
+//!
+int op_mul_double_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_double(mul_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! common code to handle DIV of double
+
+//! It uses FP stack
+int common_div_double(u2 vA, u2 v1, u2 v2) {
+    load_fp_stack_VR(OpndSize_64, v1); //fldl
+    fpu_VR(div_opc, OpndSize_64, v2); //fdivl
+    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
+    return 0;
+}
+//! lower bytecode DIV_DOUBLE by calling common_div_double
+
+//!
+int op_div_double() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_alu_double(div_opc, vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode DIV_DOUBLE_2ADDR by calling common_div_double
+
+//!
+int op_div_double_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_alu_double(div_opc, vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+#define P_GPR_1 PhysicalReg_EBX
+#define P_GPR_2 PhysicalReg_ECX
+//! common code to handle REM of float
+
+//! It uses GPR & calls call_fmodf
+int common_rem_float(u2 vA, u2 v1, u2 v2) {
+    get_virtual_reg(v1, OpndSize_32, 1, false);
+    get_virtual_reg(v2, OpndSize_32, 2, false);
+    load_effective_addr(-8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    move_reg_to_mem(OpndSize_32, 1, false, 0, PhysicalReg_ESP, true);
+    move_reg_to_mem(OpndSize_32, 2, false, 4, PhysicalReg_ESP, true);
+    scratchRegs[0] = PhysicalReg_SCRATCH_1;
+    call_fmodf(); //(float x, float y) return float
+    load_effective_addr(8, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    store_fp_stack_VR(true, OpndSize_32, vA); //fstps
+    return 0;
+}
+#undef P_GPR_1
+#undef P_GPR_2
+//! lower bytecode REM_FLOAT by calling common_rem_float
+
+//!
+int op_rem_float() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_rem_float(vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode REM_FLOAT_2ADDR by calling common_rem_float
+
+//!
+int op_rem_float_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_rem_float(vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! common code to handle REM of double
+
+//! It uses XMM & calls call_fmod
+int common_rem_double(u2 vA, u2 v1, u2 v2) {
+    get_virtual_reg(v1, OpndSize_64, 1, false);
+    get_virtual_reg(v2, OpndSize_64, 2, false);
+    load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true);
+    move_reg_to_mem(OpndSize_64, 2, false, 8, PhysicalReg_ESP, true);
+    scratchRegs[0] = PhysicalReg_SCRATCH_1;
+    call_fmod(); //(long double x, long double y) return double
+    load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true);
+    store_fp_stack_VR(true, OpndSize_64, vA); //fstpl
+    return 0;
+}
+//! lower bytecode REM_DOUBLE by calling common_rem_double
+
+//!
+int op_rem_double() {
+    u2 vA = INST_AA(inst);
+    u2 v1 = *((u1*)rPC + 2);
+    u2 v2 = *((u1*)rPC + 3);
+    int retval = common_rem_double(vA, v1, v2);
+    rPC += 2;
+    return retval;
+}
+//! lower bytecode REM_DOUBLE_2ADDR by calling common_rem_double
+
+//!
+int op_rem_double_2addr() {
+    u2 vA = INST_A(inst);
+    u2 v1 = vA;
+    u2 v2 = INST_B(inst);
+    int retval = common_rem_double(vA, v1, v2);
+    rPC += 1;
+    return retval;
+}
+//! lower bytecode CMPL_FLOAT
+
+//!
+int op_cmpl_float() {
+    u2 vA = INST_AA(inst);
+    u4 v1 = FETCH(1) & 0xff;
+    u4 v2 = FETCH(1) >> 8;
+    get_VR_ss(v1, 1, false); //xmm
+    move_imm_to_reg(OpndSize_32, 0, 1, false);
+    move_imm_to_reg(OpndSize_32, 1, 2, false);
+    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
+    compare_VR_ss_reg(v2, 1, false);
+    //default: 0xffffffff??
+    move_imm_to_reg(OpndSize_32,
+                                 0xffffffff, 4, false);
+    //ORDER of cmov matters !!! (Z,P,A)
+    //finalNaN: unordered 0xffffffff
+    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
+                                             1, false, 4, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
+                                             3, false, 4, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
+                                             2, false, 4, false);
+    set_virtual_reg(vA, OpndSize_32, 4, false);
+    rPC += 2;
+    return 0;
+}
+//! lower bytecode CMPG_FLOAT
+
+//!
+int op_cmpg_float() {
+    u2 vA = INST_AA(inst);
+    u4 v1 = FETCH(1) & 0xff;
+    u4 v2 = FETCH(1) >> 8;
+    get_VR_ss(v1, 1, false);
+    compare_VR_ss_reg(v2, 1, false);
+    move_imm_to_reg(OpndSize_32, 0, 1, false);
+    move_imm_to_reg(OpndSize_32, 1, 2, false);
+    //default: 0xffffffff??
+    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
+                                1, false, 3, false);
+    //finalNaN: unordered
+    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
+                                2, false, 3, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
+                                2, false, 3, false);
+    set_virtual_reg(vA, OpndSize_32, 3, false);
+    rPC += 2;
+    return 0;
+}
+//! lower bytecode CMPL_DOUBLE
+
+//!
+int op_cmpl_double() {
+    u2 vA = INST_AA(inst);
+    u4 v1 = FETCH(1) & 0xff;
+    u4 v2 = FETCH(1) >> 8;
+    get_VR_sd(v1, 1, false);
+    compare_VR_sd_reg(v2, 1, false);
+    move_imm_to_reg(OpndSize_32, 0, 1, false);
+    move_imm_to_reg(OpndSize_32, 1, 2, false);
+    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
+
+    //default: 0xffffffff??
+    move_imm_to_reg(OpndSize_32, 0xffffffff, 4, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
+                                             1, false, 4, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
+                                             3, false, 4, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
+                                             2, false, 4, false);
+    set_virtual_reg(vA, OpndSize_32, 4, false);
+    rPC += 2;
+    return 0;
+}
+//! lower bytecode CMPG_DOUBLE
+
+//!
+int op_cmpg_double() {
+    u2 vA = INST_AA(inst);
+    u4 v1 = FETCH(1) & 0xff;
+    u4 v2 = FETCH(1) >> 8;
+    get_VR_sd(v1, 1, false);
+    compare_VR_sd_reg(v2, 1, false);
+    move_imm_to_reg(OpndSize_32, 0, 1, false);
+    move_imm_to_reg(OpndSize_32, 1, 2, false);
+
+    //default: 0xffffffff??
+    move_imm_to_reg(OpndSize_32,
+                                 0xffffffff, 3, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_Z,
+                                             1, false, 3, false);
+    //finalNaN: unordered
+    conditional_move_reg_to_reg(OpndSize_32, Condition_P,
+                                             2, false, 3, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_A,
+                                             2, false, 3, false);
+   set_virtual_reg(vA, OpndSize_32, 3, false);
+    rPC += 2;
+    return 0;
+}
+#define P_GPR_1 PhysicalReg_EBX
+#define P_GPR_2 PhysicalReg_ECX
+#define P_GPR_3 PhysicalReg_ESI
+#define P_SCRATCH_1 PhysicalReg_EDX
+#define P_SCRATCH_2 PhysicalReg_EAX
+#define OPTION_OLD //for simpler cfg
+//! lower bytecode CMP_LONG
+
+//!
+int op_cmp_long() {
+    u2 vA = INST_AA(inst);
+    u4 v1 = FETCH(1) & 0xff;
+    u4 v2 = FETCH(1) >> 8;
+    get_virtual_reg(v1+1, OpndSize_32, 2, false);
+#ifdef OPTION_OLD
+    move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false);
+    move_imm_to_reg(OpndSize_32, 1, 4, false);
+    move_imm_to_reg(OpndSize_32, 0, 5, false);
+#endif
+    compare_VR_reg(OpndSize_32,
+                                v2+1, 2, false);
+#ifndef OPTION_OLD
+    conditional_jump(Condition_L, ".cmp_long_less", true);
+    conditional_jump(Condition_G, ".cmp_long_greater", true);
+#else
+    conditional_jump(Condition_E, ".cmp_long_equal", true);
+    rememberState(1);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_L, //below vs less
+                                             3, false, 6, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_G, //above vs greater
+                                             4, false, 6, false);
+    set_virtual_reg(vA, OpndSize_32, 6, false);
+    rememberState(2);
+    unconditional_jump(".cmp_long_okay", true);
+    insertLabel(".cmp_long_equal", true);
+    goToState(1);
+#endif
+
+    get_virtual_reg(v1, OpndSize_32, 1, false);
+    compare_VR_reg(OpndSize_32,
+                                v2, 1, false);
+#ifdef OPTION_OLD
+    conditional_move_reg_to_reg(OpndSize_32, Condition_E,
+                                             5, false, 6, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_B, //below vs less
+                                             3, false, 6, false);
+    conditional_move_reg_to_reg(OpndSize_32, Condition_A, //above vs greater
+                                             4, false, 6, false);
+    set_virtual_reg(vA, OpndSize_32, 6, false);
+    transferToState(2);
+#else
+    conditional_jump(Condition_A, ".cmp_long_greater", true);
+    conditional_jump(Condition_NE, ".cmp_long_less", true);
+    set_VR_to_imm(vA, OpndSize_32, 0);
+    unconditional_jump(".cmp_long_okay", true);
+
+    insertLabel(".cmp_long_less", true);
+    set_VR_to_imm(vA, OpndSize_32, 0xffffffff);
+    unconditional_jump(".cmp_long_okay", true);
+
+    insertLabel(".cmp_long_greater", true);
+    set_VR_to_imm(vA, OpndSize_32, 1);
+#endif
+    insertLabel(".cmp_long_okay", true);
+    rPC += 2;
+    return 0;
+}
+#undef P_GPR_1
+#undef P_GPR_2
+#undef P_GPR_3
author	Dong-Yuan Chen <dong-yuan.chen@intel.com>	2012-07-03 13:13:07 -0700
committer	Elliott Hughes <enh@google.com>	2012-07-20 12:27:18 -0700
commit	0c2dc522d0e120f346cf0a40c8cf0c93346131c2 (patch)
tree	f7ca4c8e3ca1150b7e8c5f4b68c60972641dc77f /vm/compiler/codegen/x86/LowerAlu.cpp
parent	abede656a1f2330e3d31281fb208be7c04e8eb56 (diff)
download	android_dalvik-0c2dc522d0e120f346cf0a40c8cf0c93346131c2.tar.gz android_dalvik-0c2dc522d0e120f346cf0a40c8cf0c93346131c2.tar.bz2 android_dalvik-0c2dc522d0e120f346cf0a40c8cf0c93346131c2.zip