diff options
Diffstat (limited to 'vm/compiler/codegen/x86/LowerAlu.cpp')
-rw-r--r-- | vm/compiler/codegen/x86/LowerAlu.cpp | 1962 |
1 files changed, 1962 insertions, 0 deletions
diff --git a/vm/compiler/codegen/x86/LowerAlu.cpp b/vm/compiler/codegen/x86/LowerAlu.cpp new file mode 100644 index 000000000..2231baca1 --- /dev/null +++ b/vm/compiler/codegen/x86/LowerAlu.cpp @@ -0,0 +1,1962 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/*! \file LowerAlu.cpp + \brief This file lowers ALU bytecodes. +*/ +#include "libdex/DexOpcodes.h" +#include "libdex/DexFile.h" +#include "Lower.h" +#include "NcgAot.h" +#include "enc_wrapper.h" + +///////////////////////////////////////////// +#define P_GPR_1 PhysicalReg_EBX +//! lower bytecode NEG_INT + +//! +int op_neg_int() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_unary_reg(OpndSize_32, neg_opc, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +//! lower bytecode NOT_INT + +//! +int op_not_int() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_unary_reg(OpndSize_32, not_opc, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +#undef P_GPR_1 +//! lower bytecode NEG_LONG + +//! This implementation uses XMM registers +int op_neg_long() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_64, 1, false); + alu_binary_reg_reg(OpndSize_64, xor_opc, 2, false, 2, false); + alu_binary_reg_reg(OpndSize_64, sub_opc, 1, false, 2, false); + set_virtual_reg(vA, OpndSize_64, 2, false); + rPC += 1; + return 0; +} +//! lower bytecode NOT_LONG + +//! This implementation uses XMM registers +int op_not_long() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_64, 1, false); + load_global_data_API("64bits", OpndSize_64, 2, false); + alu_binary_reg_reg(OpndSize_64, andn_opc, 2, false, 1, false); + set_virtual_reg(vA, OpndSize_64, 1, false); + rPC += 1; + return 0; +} +#define P_GPR_1 PhysicalReg_EBX +//! lower bytecode NEG_FLOAT + +//! This implementation uses GPR +int op_neg_float() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_binary_imm_reg(OpndSize_32, add_opc, 0x80000000, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +#undef P_GPR_1 + +//! lower bytecode NEG_DOUBLE + +//! This implementation uses XMM registers +int op_neg_double() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_64, 1, false); + load_global_data_API("doubNeg", OpndSize_64, 2, false); + alu_binary_reg_reg(OpndSize_64, xor_opc, 1, false, 2, false); + set_virtual_reg(vA, OpndSize_64, 2, false); + rPC += 1; + return 0; +} + +//! lower bytecode INT_TO_LONG + +//! It uses native instruction cdq +int op_int_to_long() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true); + convert_integer(OpndSize_32, OpndSize_64); + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true); + set_virtual_reg(vA+1, OpndSize_32, PhysicalReg_EDX, true); + rPC += 1; + return 0; +} +//! lower bytecode INT_TO_FLOAT + +//! This implementation uses FP stack +int op_int_to_float() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + load_int_fp_stack_VR(OpndSize_32, vB); //fildl + store_fp_stack_VR(true, OpndSize_32, vA); //fstps + rPC += 1; + return 0; +} +//! lower bytecode INT_TO_DOUBLE + +//! This implementation uses FP stack +int op_int_to_double() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + load_int_fp_stack_VR(OpndSize_32, vB); //fildl + store_fp_stack_VR(true, OpndSize_64, vA); //fstpl + rPC += 1; + return 0; +} +//! lower bytecode LONG_TO_FLOAT + +//! This implementation uses FP stack +int op_long_to_float() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + load_int_fp_stack_VR(OpndSize_64, vB); //fildll + store_fp_stack_VR(true, OpndSize_32, vA); //fstps + rPC += 1; + return 0; +} +//! lower bytecode LONG_TO_DOUBLE + +//! This implementation uses FP stack +int op_long_to_double() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + load_int_fp_stack_VR(OpndSize_64, vB); //fildll + store_fp_stack_VR(true, OpndSize_64, vA); //fstpl + rPC += 1; + return 0; +} +//! lower bytecode FLOAT_TO_DOUBLE + +//! This implementation uses FP stack +int op_float_to_double() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + load_fp_stack_VR(OpndSize_32, vB); //flds + store_fp_stack_VR(true, OpndSize_64, vA); //fstpl + rPC += 1; + return 0; +} +//! lower bytecode DOUBLE_TO_FLOAT + +//! This implementation uses FP stack +int op_double_to_float() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + load_fp_stack_VR(OpndSize_64, vB); //fldl + store_fp_stack_VR(true, OpndSize_32, vA); //fstps + rPC += 1; + return 0; +} +#define P_GPR_1 PhysicalReg_EBX +//! lower bytecode LONG_TO_INT + +//! This implementation uses GPR +int op_long_to_int() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +#undef P_GPR_1 + +//! common code to convert a float or double to integer + +//! It uses FP stack +int common_fp_to_int(bool isDouble, u2 vA, u2 vB) { + if(isDouble) { + load_fp_stack_VR(OpndSize_64, vB); //fldl + } + else { + load_fp_stack_VR(OpndSize_32, vB); //flds + } + + load_fp_stack_global_data_API("intMax", OpndSize_32); + load_fp_stack_global_data_API("intMin", OpndSize_32); + + //ST(0) ST(1) ST(2) --> LintMin LintMax value + compare_fp_stack(true, 2, false/*isDouble*/); //ST(2) + //ST(0) ST(1) --> LintMax value + conditional_jump(Condition_AE, ".float_to_int_negInf", true); + rememberState(1); + compare_fp_stack(true, 1, false/*isDouble*/); //ST(1) + //ST(0) --> value + rememberState(2); + conditional_jump(Condition_C, ".float_to_int_nanInf", true); + //fnstcw, orw, fldcw, xorw + load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true); + alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true); + load_fpu_cw(0, PhysicalReg_ESP, true); + alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true); + store_int_fp_stack_VR(true/*pop*/, OpndSize_32, vA); //fistpl + //fldcw + load_fpu_cw(0, PhysicalReg_ESP, true); + load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + rememberState(3); + unconditional_jump(".float_to_int_okay", true); + insertLabel(".float_to_int_nanInf", true); + conditional_jump(Condition_NP, ".float_to_int_posInf", true); + //fstps CHECK + goToState(2); + store_fp_stack_VR(true, OpndSize_32, vA); + set_VR_to_imm(vA, OpndSize_32, 0); + transferToState(3); + unconditional_jump(".float_to_int_okay", true); + insertLabel(".float_to_int_posInf", true); + //fstps CHECK + goToState(2); + store_fp_stack_VR(true, OpndSize_32, vA); + set_VR_to_imm(vA, OpndSize_32, 0x7fffffff); + transferToState(3); + unconditional_jump(".float_to_int_okay", true); + insertLabel(".float_to_int_negInf", true); + goToState(1); + //fstps CHECK + store_fp_stack_VR(true, OpndSize_32, vA); + store_fp_stack_VR(true, OpndSize_32, vA); + set_VR_to_imm(vA, OpndSize_32, 0x80000000); + transferToState(3); + insertLabel(".float_to_int_okay", true); + return 0; +} +//! lower bytecode FLOAT_TO_INT by calling common_fp_to_int + +//! +int op_float_to_int() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + int retval = common_fp_to_int(false, vA, vB); + rPC += 1; + return retval; +} +//! lower bytecode DOUBLE_TO_INT by calling common_fp_to_int + +//! +int op_double_to_int() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + int retval = common_fp_to_int(true, vA, vB); + rPC += 1; + return retval; +} + +//! common code to convert float or double to long + +//! It uses FP stack +int common_fp_to_long(bool isDouble, u2 vA, u2 vB) { + if(isDouble) { + load_fp_stack_VR(OpndSize_64, vB); //fldl + } + else { + load_fp_stack_VR(OpndSize_32, vB); //flds + } + + load_fp_stack_global_data_API("valuePosInfLong", OpndSize_64); + load_fp_stack_global_data_API("valueNegInfLong", OpndSize_64); + + //ST(0) ST(1) ST(2) --> LintMin LintMax value + compare_fp_stack(true, 2, false/*isDouble*/); //ST(2) + //ST(0) ST(1) --> LintMax value + conditional_jump(Condition_AE, ".float_to_long_negInf", true); + rememberState(1); + compare_fp_stack(true, 1, false/*isDouble*/); //ST(1) + rememberState(2); + //ST(0) --> value + conditional_jump(Condition_C, ".float_to_long_nanInf", true); + //fnstcw, orw, fldcw, xorw + load_effective_addr(-2, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + store_fpu_cw(false/*checkException*/, 0, PhysicalReg_ESP, true); + alu_binary_imm_mem(OpndSize_16, or_opc, 0xc00, 0, PhysicalReg_ESP, true); + load_fpu_cw(0, PhysicalReg_ESP, true); + alu_binary_imm_mem(OpndSize_16, xor_opc, 0xc00, 0, PhysicalReg_ESP, true); + store_int_fp_stack_VR(true/*pop*/, OpndSize_64, vA); //fistpll + //fldcw + load_fpu_cw(0, PhysicalReg_ESP, true); + load_effective_addr(2, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + rememberState(3); + unconditional_jump(".float_to_long_okay", true); + insertLabel(".float_to_long_nanInf", true); + conditional_jump(Condition_NP, ".float_to_long_posInf", true); + //fstpl?? + goToState(2); + + load_global_data_API("valueNanLong", OpndSize_64, 1, false); + + set_virtual_reg(vA, OpndSize_64, 1, false); + transferToState(3); + unconditional_jump(".float_to_long_okay", true); + insertLabel(".float_to_long_posInf", true); + //fstpl + goToState(2); + + load_global_data_API("valuePosInfLong", OpndSize_64, 2, false); + set_virtual_reg(vA, OpndSize_64, 2, false); + transferToState(3); + unconditional_jump(".float_to_long_okay", true); + insertLabel(".float_to_long_negInf", true); + //fstpl + //fstpl + goToState(1); + + load_global_data_API("valueNegInfLong", OpndSize_64, 3, false); + set_virtual_reg(vA, OpndSize_64, 3, false); + transferToState(3); + insertLabel(".float_to_long_okay", true); + return 0; +} +//! lower bytecode FLOAT_TO_LONG by calling common_fp_to_long + +//! +int op_float_to_long() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + int retval = common_fp_to_long(false, vA, vB); + rPC += 1; + return retval; +} +//! lower bytecode DOUBLE_TO_LONG by calling common_fp_to_long + +//! +int op_double_to_long() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + int retval = common_fp_to_long(true, vA, vB); + rPC += 1; + return retval; +} +#define P_GPR_1 PhysicalReg_EBX +//! lower bytecode INT_TO_BYTE + +//! It uses GPR +int op_int_to_byte() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_binary_imm_reg(OpndSize_32, sal_opc, 24, 1, false); + alu_binary_imm_reg(OpndSize_32, sar_opc, 24, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +//! lower bytecode INT_TO_CHAR + +//! It uses GPR +int op_int_to_char() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false); + alu_binary_imm_reg(OpndSize_32, shr_opc, 16, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +//! lower bytecode INT_TO_SHORT + +//! It uses GPR +int op_int_to_short() { + u2 vA = INST_A(inst); //destination + u2 vB = INST_B(inst); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_binary_imm_reg(OpndSize_32, sal_opc, 16, 1, false); + alu_binary_imm_reg(OpndSize_32, sar_opc, 16, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + rPC += 1; + return 0; +} +//! common code to handle integer ALU ops + +//! It uses GPR +int common_alu_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem + get_virtual_reg(v1, OpndSize_32, 1, false); + //in encoder, reg is first operand, which is the destination + //gpr_1 op v2(rFP) --> gpr_1 + //shift only works with reg cl, v2 should be in %ecx + alu_binary_VR_reg(OpndSize_32, opc, v2, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + return 0; +} +#undef P_GPR_1 +#define P_GPR_1 PhysicalReg_EBX +//! common code to handle integer shift ops + +//! It uses GPR +int common_shift_int(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { + get_virtual_reg(v2, OpndSize_32, PhysicalReg_ECX, true); + get_virtual_reg(v1, OpndSize_32, 1, false); + //in encoder, reg2 is first operand, which is the destination + //gpr_1 op v2(rFP) --> gpr_1 + //shift only works with reg cl, v2 should be in %ecx + alu_binary_reg_reg(OpndSize_32, opc, PhysicalReg_ECX, true, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + return 0; +} +#undef p_GPR_1 +//! lower bytecode ADD_INT by calling common_alu_int + +//! +int op_add_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_alu_int(add_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SUB_INT by calling common_alu_int + +//! +int op_sub_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_alu_int(sub_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode MUL_INT by calling common_alu_int + +//! +int op_mul_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_alu_int(imul_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode AND_INT by calling common_alu_int + +//! +int op_and_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_alu_int(and_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode OR_INT by calling common_alu_int + +//! +int op_or_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_alu_int(or_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode XOR_INT by calling common_alu_int + +//! +int op_xor_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_alu_int(xor_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SHL_INT by calling common_shift_int + +//! +int op_shl_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_shift_int(shl_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SHR_INT by calling common_shift_int + +//! +int op_shr_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_shift_int(sar_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode USHR_INT by calling common_shift_int + +//! +int op_ushr_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_shift_int(shr_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode ADD_INT_2ADDR by calling common_alu_int + +//! +int op_add_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_alu_int(add_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SUB_INT_2ADDR by calling common_alu_int + +//! +int op_sub_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_alu_int(sub_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode MUL_INT_2ADDR by calling common_alu_int + +//! +int op_mul_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_alu_int(imul_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode AND_INT_2ADDR by calling common_alu_int + +//! +int op_and_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_alu_int(and_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode OR_INT_2ADDR by calling common_alu_int + +//! +int op_or_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_alu_int(or_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode XOR_INT_2ADDR by calling common_alu_int + +//! +int op_xor_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_alu_int(xor_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SHL_INT_2ADDR by calling common_shift_int + +//! +int op_shl_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_shift_int(shl_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SHR_INT_2ADDR by calling common_shift_int + +//! +int op_shr_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_shift_int(sar_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode USHR_INT_2ADDR by calling common_shift_int + +//! +int op_ushr_int_2addr() { + u2 vA, v1, v2; + vA = INST_A(inst); + v1 = vA; + v2 = INST_B(inst); + int retval = common_shift_int(shr_opc, vA, v1, v2); + rPC += 1; + return retval; +} +#define P_GPR_1 PhysicalReg_EBX +//!common code to handle integer DIV & REM, it used GPR + +//!The special case: when op0 == minint && op1 == -1, return 0 for isRem, return 0x80000000 for isDiv +//!There are two merge points in the control flow for this bytecode +//!make sure the reg. alloc. state is the same at merge points by calling transferToState +int common_div_rem_int(bool isRem, u2 vA, u2 v1, u2 v2) { + get_virtual_reg(v1, OpndSize_32, PhysicalReg_EAX, true); + get_virtual_reg(v2, OpndSize_32, 2, false); + compare_imm_reg(OpndSize_32, 0, 2, false); + handlePotentialException( + Condition_E, Condition_NE, + 1, "common_errDivideByZero"); + /////////////////// handle special cases + //conditional move 0 to $edx for rem for the two special cases + //conditional move 0x80000000 to $eax for div + //handle -1 special case divide error + compare_imm_reg(OpndSize_32, -1, 2, false); + conditional_jump(Condition_NE, ".common_div_rem_int_normal", true); + //handle min int special case divide error + rememberState(1); + compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true); + transferToState(1); + conditional_jump(Condition_E, ".common_div_rem_int_special", true); + + insertLabel(".common_div_rem_int_normal", true); //merge point + convert_integer(OpndSize_32, OpndSize_64); //cdq + //idiv: dividend in edx:eax; quotient in eax; remainder in edx + alu_unary_reg(OpndSize_32, idiv_opc, 2, false); + if(isRem) + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true); + else //divide: quotient in %eax + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true); + rememberState(2); + unconditional_jump(".common_div_rem_int_okay", true); + + insertLabel(".common_div_rem_int_special", true); + goToState(1); + if(isRem) + set_VR_to_imm(vA, OpndSize_32, 0); + else + set_VR_to_imm(vA, OpndSize_32, 0x80000000); + transferToState(2); + insertLabel(".common_div_rem_int_okay", true); //merge point 2 + return 0; +} +#undef P_GPR_1 +//! lower bytecode DIV_INT by calling common_div_rem_int + +//! +int op_div_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_div_rem_int(false, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode REM_INT by calling common_div_rem_int + +//! +int op_rem_int() { + u2 vA, v1, v2; + vA = INST_AA(inst); + v1 = *((u1*)rPC + 2); + v2 = *((u1*)rPC + 3); + int retval = common_div_rem_int(true, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode DIV_INT_2ADDR by calling common_div_rem_int + +//! +int op_div_int_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_div_rem_int(false, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode REM_INT_2ADDR by calling common_div_rem_int + +//! +int op_rem_int_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_div_rem_int(true, vA, v1, v2); + rPC += 1; + return retval; +} + +#define P_GPR_1 PhysicalReg_EBX +//! common code to handle integer ALU ops with literal + +//! It uses GPR +int common_alu_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { //except div and rem + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_binary_imm_reg(OpndSize_32, opc, imm, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + return 0; +} +//! calls common_alu_int_lit +int common_shift_int_lit(ALU_Opcode opc, u2 vA, u2 vB, s2 imm) { + return common_alu_int_lit(opc, vA, vB, imm); +} +#undef p_GPR_1 +//! lower bytecode ADD_INT_LIT16 by calling common_alu_int_lit + +//! +int op_add_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_alu_int_lit(add_opc, vA, vB, tmp); + rPC += 2; + return retval; +} + +int alu_rsub_int(ALU_Opcode opc, u2 vA, s2 imm, u2 vB) { + move_imm_to_reg(OpndSize_32, imm, 2, false); + get_virtual_reg(vB, OpndSize_32, 1, false); + alu_binary_reg_reg(OpndSize_32, opc, 1, false, 2, false); + set_virtual_reg(vA, OpndSize_32, 2, false); + return 0; +} + + +//! lower bytecode RSUB_INT by calling common_alu_int_lit + +//! +int op_rsub_int() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = alu_rsub_int(sub_opc, vA, tmp, vB); + rPC += 2; + return retval; +} +//! lower bytecode MUL_INT_LIT16 by calling common_alu_int_lit + +//! +int op_mul_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_alu_int_lit(imul_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode AND_INT_LIT16 by calling common_alu_int_lit + +//! +int op_and_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_alu_int_lit(and_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode OR_INT_LIT16 by calling common_alu_int_lit + +//! +int op_or_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_alu_int_lit(or_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode XOR_INT_LIT16 by calling common_alu_int_lit + +//! +int op_xor_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_alu_int_lit(xor_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode SHL_INT_LIT16 by calling common_shift_int_lit + +//! +int op_shl_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_shift_int_lit(shl_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode SHR_INT_LIT16 by calling common_shift_int_lit + +//! +int op_shr_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_shift_int_lit(sar_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode USHR_INT_LIT16 by calling common_shift_int_lit + +//! +int op_ushr_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_shift_int_lit(shr_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode ADD_INT_LIT8 by calling common_alu_int_lit + +//! +int op_add_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_alu_int_lit(add_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode RSUB_INT_LIT8 by calling common_alu_int_lit + +//! +int op_rsub_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = alu_rsub_int(sub_opc, vA, tmp, vB); + rPC += 2; + return retval; +} +//! lower bytecode MUL_INT_LIT8 by calling common_alu_int_lit + +//! +int op_mul_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_alu_int_lit(imul_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode AND_INT_LIT8 by calling common_alu_int_lit + +//! +int op_and_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_alu_int_lit(and_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode OR_INT_LIT8 by calling common_alu_int_lit + +//! +int op_or_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_alu_int_lit(or_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode XOR_INT_LIT8 by calling common_alu_int_lit + +//! +int op_xor_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_alu_int_lit(xor_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode SHL_INT_LIT8 by calling common_shift_int_lit + +//! +int op_shl_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_shift_int_lit(shl_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode SHR_INT_LIT8 by calling common_shift_int_lit + +//! +int op_shr_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_shift_int_lit(sar_opc, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode USHR_INT_LIT8 by calling common_shift_int_lit + +//! +int op_ushr_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_shift_int_lit(shr_opc, vA, vB, tmp); + rPC += 2; + return retval; +} + +int isPowerOfTwo(int imm) { + int i; + for(i = 1; i < 17; i++) { + if(imm == (1 << i)) return i; + } + return -1; +} + +#define P_GPR_1 PhysicalReg_EBX +int div_lit_strength_reduction(u2 vA, u2 vB, s2 imm) { + if(gDvm.executionMode == kExecutionModeNcgO1) { + //strength reduction for div by 2,4,8,... + int power = isPowerOfTwo(imm); + if(power < 1) return 0; + //tmp2 is not updated, so it can share with vB + get_virtual_reg(vB, OpndSize_32, 2, false); + //if imm is 2, power will be 1 + if(power == 1) { + /* mov tmp1, tmp2 + shrl $31, tmp1 + addl tmp2, tmp1 + sarl $1, tmp1 */ + move_reg_to_reg(OpndSize_32, 2, false, 1, false); + alu_binary_imm_reg(OpndSize_32, shr_opc, 31, 1, false); + alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false); + alu_binary_imm_reg(OpndSize_32, sar_opc, 1, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + return 1; + } + //power > 1 + /* mov tmp1, tmp2 + sarl $power-1, tmp1 + shrl 32-$power, tmp1 + addl tmp2, tmp1 + sarl $power, tmp1 */ + move_reg_to_reg(OpndSize_32, 2, false, 1, false); + alu_binary_imm_reg(OpndSize_32, sar_opc, power-1, 1, false); + alu_binary_imm_reg(OpndSize_32, shr_opc, 32-power, 1, false); + alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false); + alu_binary_imm_reg(OpndSize_32, sar_opc, power, 1, false); + set_virtual_reg(vA, OpndSize_32, 1, false); + return 1; + } + return 0; +} + +////////// throws exception!!! +//! common code to handle integer DIV & REM with literal + +//! It uses GPR +int common_div_rem_int_lit(bool isRem, u2 vA, u2 vB, s2 imm) { + if(!isRem) { + int retCode = div_lit_strength_reduction(vA, vB, imm); + if(retCode > 0) return 0; + } + if(imm == 0) { + export_pc(); //use %edx +#ifdef DEBUG_EXCEPTION + LOGI("EXTRA code to handle exception"); +#endif + constVREndOfBB(); + beforeCall("exception"); //dump GG, GL VRs + unconditional_jump_global_API( + "common_errDivideByZero", false); + + return 0; + } + get_virtual_reg(vB, OpndSize_32, PhysicalReg_EAX, true); + //check against -1 for DIV_INT?? + if(imm == -1) { + compare_imm_reg(OpndSize_32, 0x80000000, PhysicalReg_EAX, true); + conditional_jump(Condition_E, ".div_rem_int_lit_special", true); + rememberState(1); + } + move_imm_to_reg(OpndSize_32, imm, 2, false); + convert_integer(OpndSize_32, OpndSize_64); //cdq + //idiv: dividend in edx:eax; quotient in eax; remainder in edx + alu_unary_reg(OpndSize_32, idiv_opc, 2, false); + if(isRem) + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EDX, true); + else + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true); + + if(imm == -1) { + unconditional_jump(".div_rem_int_lit_okay", true); + rememberState(2); + insertLabel(".div_rem_int_lit_special", true); + goToState(1); + if(isRem) + set_VR_to_imm(vA, OpndSize_32, 0); + else + set_VR_to_imm(vA, OpndSize_32, 0x80000000); + transferToState(2); + } + + insertLabel(".div_rem_int_lit_okay", true); //merge point 2 + return 0; +} +#undef P_GPR_1 +//! lower bytecode DIV_INT_LIT16 by calling common_div_rem_int_lit + +//! +int op_div_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_div_rem_int_lit(false, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode REM_INT_LIT16 by calling common_div_rem_int_lit + +//! +int op_rem_int_lit16() { + u2 vA = INST_A(inst); + u2 vB = INST_B(inst); + s4 tmp = (s2)FETCH(1); + int retval = common_div_rem_int_lit(true, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode DIV_INT_LIT8 by calling common_div_rem_int_lit + +//! +int op_div_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_div_rem_int_lit(false, vA, vB, tmp); + rPC += 2; + return retval; +} +//! lower bytecode REM_INT_LIT8 by calling common_div_rem_int_lit + +//! +int op_rem_int_lit8() { + u2 vA = INST_AA(inst); + u2 vB = (u2)FETCH(1) & 0xff; + s2 tmp = (s2)FETCH(1) >> 8; + int retval = common_div_rem_int_lit(true, vA, vB, tmp); + rPC += 2; + return retval; +} +//! common code to hanle long ALU ops + +//! It uses XMM +int common_alu_long(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) { //except div and rem + get_virtual_reg(v1, OpndSize_64, 1, false); + get_virtual_reg(v2, OpndSize_64, 2, false); + alu_binary_reg_reg(OpndSize_64, opc, 2, false, 1, false); + set_virtual_reg(vA, OpndSize_64, 1, false); + return 0; +} +//! lower bytecode ADD_LONG by calling common_alu_long + +//! +int op_add_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_long(add_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SUB_LONG by calling common_alu_long + +//! +int op_sub_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_long(sub_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode AND_LONG by calling common_alu_long + +//! +int op_and_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_long(and_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode OR_LONG by calling common_alu_long + +//! +int op_or_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_long(or_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode XOR_LONG by calling common_alu_long + +//! +int op_xor_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_long(xor_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode ADD_LONG_2ADDR by calling common_alu_long + +//! +int op_add_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_long(add_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SUB_LONG_2ADDR by calling common_alu_long + +//! +int op_sub_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_long(sub_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode AND_LONG_2ADDR by calling common_alu_long + +//! +int op_and_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_long(and_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode OR_LONG_2ADDR by calling common_alu_long + +//! +int op_or_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_long(or_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode XOR_LONG_2ADDR by calling common_alu_long + +//! +int op_xor_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_long(xor_opc, vA, v1, v2); + rPC += 1; + return retval; +} + +//signed vs unsigned imul and mul? +#define P_GPR_1 PhysicalReg_EBX +#define P_GPR_2 PhysicalReg_ECX +#define P_GPR_3 PhysicalReg_ESI +//! common code to handle multiplication of long + +//! It uses GPR +int common_mul_long(u2 vA, u2 v1, u2 v2) { + get_virtual_reg(v2, OpndSize_32, 1, false); + move_reg_to_reg(OpndSize_32, 1, false, PhysicalReg_EAX, true); + //imul: 2L * 1H update temporary 1 + alu_binary_VR_reg(OpndSize_32, imul_opc, (v1+1), 1, false); + get_virtual_reg(v1, OpndSize_32, 3, false); + move_reg_to_reg(OpndSize_32, 3, false, 2, false); + //imul: 1L * 2H + alu_binary_VR_reg(OpndSize_32, imul_opc, (v2+1), 2, false); + alu_binary_reg_reg(OpndSize_32, add_opc, 2, false, 1, false); + alu_unary_reg(OpndSize_32, mul_opc, 3, false); + alu_binary_reg_reg(OpndSize_32, add_opc, PhysicalReg_EDX, true, 1, false); + set_virtual_reg(vA+1, OpndSize_32, 1, false); + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true); + return 0; +} +#undef P_GPR_1 +#undef P_GPR_2 +#undef P_GPR_3 +//! lower bytecode MUL_LONG by calling common_mul_long + +//! +int op_mul_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_mul_long(vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode MUL_LONG_2ADDR by calling common_mul_long + +//! +int op_mul_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_mul_long(vA, v1, v2); + rPC += 1; + return retval; +} + +#define P_GPR_1 PhysicalReg_EBX +#define P_GPR_2 PhysicalReg_ECX +//! common code to handle DIV & REM of long + +//! It uses GPR & XMM; and calls call_moddi3 & call_divdi3 +int common_div_rem_long(bool isRem, u2 vA, u2 v1, u2 v2) { + get_virtual_reg(v2, OpndSize_32, 1, false); + get_virtual_reg(v2+1, OpndSize_32, 2, false); + //save to native stack before changing register P_GPR_1 + load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + move_reg_to_mem(OpndSize_32, 1, false, 8, PhysicalReg_ESP, true); + alu_binary_reg_reg(OpndSize_32, or_opc, 2, false, 1, false); + + handlePotentialException( + Condition_E, Condition_NE, + 1, "common_errDivideByZero"); + move_reg_to_mem(OpndSize_32, 2, false, 12, PhysicalReg_ESP, true); + get_virtual_reg(v1, OpndSize_64, 1, false); + move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true); + scratchRegs[0] = PhysicalReg_SCRATCH_1; + nextVersionOfHardReg(PhysicalReg_EDX, 2); //next version has 2 refs + if(isRem) + call_moddi3(); + else + call_divdi3(); + load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + set_virtual_reg(vA+1, OpndSize_32,PhysicalReg_EDX, true); + set_virtual_reg(vA, OpndSize_32, PhysicalReg_EAX, true); + return 0; +} +#undef P_GPR_1 +#undef P_GPR_2 +//! lower bytecode DIV_LONG by calling common_div_rem_long + +//! +int op_div_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_div_rem_long(false, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode REM_LONG by calling common_div_rem_long + +//! +int op_rem_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_div_rem_long(true, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode DIV_LONG_2ADDR by calling common_div_rem_long + +//! +int op_div_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_div_rem_long(false, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode REM_LONG_2ADDR by calling common_div_rem_long + +//! +int op_rem_long_2addr() { //call __moddi3 instead of __divdi3 + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_div_rem_long(true, vA, v1, v2); + rPC += 1; + return retval; +} + +//! common code to handle SHL long + +//! It uses XMM +int common_shl_long(u2 vA, u2 v1, u2 v2) { + get_VR_ss(v2, 2, false); + + load_global_data_API("shiftMask", OpndSize_64, 3, false); + + get_virtual_reg(v1, OpndSize_64, 1, false); + alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false); + alu_binary_reg_reg(OpndSize_64, sll_opc, 2, false, 1, false); + set_virtual_reg(vA, OpndSize_64, 1, false); + return 0; +} + +//! common code to handle SHR long + +//! It uses XMM +int common_shr_long(u2 vA, u2 v1, u2 v2) { + get_VR_ss(v2, 2, false); + + load_global_data_API("shiftMask", OpndSize_64, 3, false); + + get_virtual_reg(v1, OpndSize_64, 1, false); + alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false); + alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false); + compare_imm_VR(OpndSize_32, 0, (v1+1)); + conditional_jump(Condition_GE, ".common_shr_long_special", true); + rememberState(1); + + load_global_data_API("value64", OpndSize_64, 4, false); + + alu_binary_reg_reg(OpndSize_64, sub_opc, 2, false, 4, false); + + load_global_data_API("64bits", OpndSize_64, 5, false); + + alu_binary_reg_reg(OpndSize_64, sll_opc, 4, false, 5, false); + alu_binary_reg_reg(OpndSize_64, or_opc, 5, false, 1, false); + rememberState(2); + //check whether the target is next instruction TODO + unconditional_jump(".common_shr_long_done", true); + + insertLabel(".common_shr_long_special", true); + goToState(1); + transferToState(2); + insertLabel(".common_shr_long_done", true); + set_virtual_reg(vA, OpndSize_64, 1, false); + return 0; +} + +//! common code to handle USHR long + +//! It uses XMM +int common_ushr_long(u2 vA, u2 v1, u2 v2) { + get_VR_sd(v1, 1, false); + get_VR_ss(v2, 2, false); + + load_sd_global_data_API("shiftMask", 3, false); + + alu_binary_reg_reg(OpndSize_64, and_opc, 3, false, 2, false); + alu_binary_reg_reg(OpndSize_64, srl_opc, 2, false, 1, false); + set_VR_sd(vA, 1, false); + return 0; +} +//! lower bytecode SHL_LONG by calling common_shl_long + +//! +int op_shl_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_shl_long(vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SHL_LONG_2ADDR by calling common_shl_long + +//! +int op_shl_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_shl_long(vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SHR_LONG by calling common_shr_long + +//! +int op_shr_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_shr_long(vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SHR_LONG_2ADDR by calling common_shr_long + +//! +int op_shr_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_shr_long(vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode USHR_LONG by calling common_ushr_long + +//! +int op_ushr_long() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_ushr_long(vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode USHR_LONG_2ADDR by calling common_ushr_long + +//! +int op_ushr_long_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_ushr_long(vA, v1, v2); + rPC += 1; + return retval; +} +#define USE_MEM_OPERAND +/////////////////////////////////////////// +//! common code to handle ALU of floats + +//! It uses XMM +int common_alu_float(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul + get_VR_ss(v1, 1, false); +#ifdef USE_MEM_OPERAND + alu_sd_binary_VR_reg(opc, v2, 1, false, false/*isSD*/); +#else + get_VR_ss(v2, 2, false); + alu_ss_binary_reg_reg(opc, 2, false, 1, false); +#endif + set_VR_ss(vA, 1, false); + return 0; +} +//! lower bytecode ADD_FLOAT by calling common_alu_float + +//! +int op_add_float() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_float(add_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SUB_FLOAT by calling common_alu_float + +//! +int op_sub_float() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_float(sub_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode MUL_FLOAT by calling common_alu_float + +//! +int op_mul_float() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_float(mul_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode ADD_FLOAT_2ADDR by calling common_alu_float + +//! +int op_add_float_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_float(add_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SUB_FLOAT_2ADDR by calling common_alu_float + +//! +int op_sub_float_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_float(sub_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode MUL_FLOAT_2ADDR by calling common_alu_float + +//! +int op_mul_float_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_float(mul_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! common code to handle DIV of float + +//! It uses FP stack +int common_div_float(u2 vA, u2 v1, u2 v2) { + load_fp_stack_VR(OpndSize_32, v1); //flds + fpu_VR(div_opc, OpndSize_32, v2); + store_fp_stack_VR(true, OpndSize_32, vA); //fstps + return 0; +} +//! lower bytecode DIV_FLOAT by calling common_div_float + +//! +int op_div_float() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_float(div_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode DIV_FLOAT_2ADDR by calling common_div_float + +//! +int op_div_float_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_float(div_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! common code to handle DIV of double + +//! It uses XMM +int common_alu_double(ALU_Opcode opc, u2 vA, u2 v1, u2 v2) {//add, sub, mul + get_VR_sd(v1, 1, false); +#ifdef USE_MEM_OPERAND + alu_sd_binary_VR_reg(opc, v2, 1, false, true /*isSD*/); +#else + get_VR_sd(v2, 2, false); + alu_sd_binary_reg_reg(opc, 2, false, 1, false); +#endif + set_VR_sd(vA, 1, false); + return 0; +} +//! lower bytecode ADD_DOUBLE by calling common_alu_double + +//! +int op_add_double() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_double(add_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode SUB_DOUBLE by calling common_alu_double + +//! +int op_sub_double() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_double(sub_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode MUL_DOUBLE by calling common_alu_double + +//! +int op_mul_double() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_double(mul_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode ADD_DOUBLE_2ADDR by calling common_alu_double + +//! +int op_add_double_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_double(add_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode SUB_DOUBLE_2ADDR by calling common_alu_double + +//! +int op_sub_double_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_double(sub_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode MUL_DOUBLE_2ADDR by calling common_alu_double + +//! +int op_mul_double_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_double(mul_opc, vA, v1, v2); + rPC += 1; + return retval; +} +//! common code to handle DIV of double + +//! It uses FP stack +int common_div_double(u2 vA, u2 v1, u2 v2) { + load_fp_stack_VR(OpndSize_64, v1); //fldl + fpu_VR(div_opc, OpndSize_64, v2); //fdivl + store_fp_stack_VR(true, OpndSize_64, vA); //fstpl + return 0; +} +//! lower bytecode DIV_DOUBLE by calling common_div_double + +//! +int op_div_double() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_alu_double(div_opc, vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode DIV_DOUBLE_2ADDR by calling common_div_double + +//! +int op_div_double_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_alu_double(div_opc, vA, v1, v2); + rPC += 1; + return retval; +} +#define P_GPR_1 PhysicalReg_EBX +#define P_GPR_2 PhysicalReg_ECX +//! common code to handle REM of float + +//! It uses GPR & calls call_fmodf +int common_rem_float(u2 vA, u2 v1, u2 v2) { + get_virtual_reg(v1, OpndSize_32, 1, false); + get_virtual_reg(v2, OpndSize_32, 2, false); + load_effective_addr(-8, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + move_reg_to_mem(OpndSize_32, 1, false, 0, PhysicalReg_ESP, true); + move_reg_to_mem(OpndSize_32, 2, false, 4, PhysicalReg_ESP, true); + scratchRegs[0] = PhysicalReg_SCRATCH_1; + call_fmodf(); //(float x, float y) return float + load_effective_addr(8, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + store_fp_stack_VR(true, OpndSize_32, vA); //fstps + return 0; +} +#undef P_GPR_1 +#undef P_GPR_2 +//! lower bytecode REM_FLOAT by calling common_rem_float + +//! +int op_rem_float() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_rem_float(vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode REM_FLOAT_2ADDR by calling common_rem_float + +//! +int op_rem_float_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_rem_float(vA, v1, v2); + rPC += 1; + return retval; +} +//! common code to handle REM of double + +//! It uses XMM & calls call_fmod +int common_rem_double(u2 vA, u2 v1, u2 v2) { + get_virtual_reg(v1, OpndSize_64, 1, false); + get_virtual_reg(v2, OpndSize_64, 2, false); + load_effective_addr(-16, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + move_reg_to_mem(OpndSize_64, 1, false, 0, PhysicalReg_ESP, true); + move_reg_to_mem(OpndSize_64, 2, false, 8, PhysicalReg_ESP, true); + scratchRegs[0] = PhysicalReg_SCRATCH_1; + call_fmod(); //(long double x, long double y) return double + load_effective_addr(16, PhysicalReg_ESP, true, PhysicalReg_ESP, true); + store_fp_stack_VR(true, OpndSize_64, vA); //fstpl + return 0; +} +//! lower bytecode REM_DOUBLE by calling common_rem_double + +//! +int op_rem_double() { + u2 vA = INST_AA(inst); + u2 v1 = *((u1*)rPC + 2); + u2 v2 = *((u1*)rPC + 3); + int retval = common_rem_double(vA, v1, v2); + rPC += 2; + return retval; +} +//! lower bytecode REM_DOUBLE_2ADDR by calling common_rem_double + +//! +int op_rem_double_2addr() { + u2 vA = INST_A(inst); + u2 v1 = vA; + u2 v2 = INST_B(inst); + int retval = common_rem_double(vA, v1, v2); + rPC += 1; + return retval; +} +//! lower bytecode CMPL_FLOAT + +//! +int op_cmpl_float() { + u2 vA = INST_AA(inst); + u4 v1 = FETCH(1) & 0xff; + u4 v2 = FETCH(1) >> 8; + get_VR_ss(v1, 1, false); //xmm + move_imm_to_reg(OpndSize_32, 0, 1, false); + move_imm_to_reg(OpndSize_32, 1, 2, false); + move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false); + compare_VR_ss_reg(v2, 1, false); + //default: 0xffffffff?? + move_imm_to_reg(OpndSize_32, + 0xffffffff, 4, false); + //ORDER of cmov matters !!! (Z,P,A) + //finalNaN: unordered 0xffffffff + conditional_move_reg_to_reg(OpndSize_32, Condition_Z, + 1, false, 4, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_P, + 3, false, 4, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_A, + 2, false, 4, false); + set_virtual_reg(vA, OpndSize_32, 4, false); + rPC += 2; + return 0; +} +//! lower bytecode CMPG_FLOAT + +//! +int op_cmpg_float() { + u2 vA = INST_AA(inst); + u4 v1 = FETCH(1) & 0xff; + u4 v2 = FETCH(1) >> 8; + get_VR_ss(v1, 1, false); + compare_VR_ss_reg(v2, 1, false); + move_imm_to_reg(OpndSize_32, 0, 1, false); + move_imm_to_reg(OpndSize_32, 1, 2, false); + //default: 0xffffffff?? + move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_Z, + 1, false, 3, false); + //finalNaN: unordered + conditional_move_reg_to_reg(OpndSize_32, Condition_P, + 2, false, 3, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_A, + 2, false, 3, false); + set_virtual_reg(vA, OpndSize_32, 3, false); + rPC += 2; + return 0; +} +//! lower bytecode CMPL_DOUBLE + +//! +int op_cmpl_double() { + u2 vA = INST_AA(inst); + u4 v1 = FETCH(1) & 0xff; + u4 v2 = FETCH(1) >> 8; + get_VR_sd(v1, 1, false); + compare_VR_sd_reg(v2, 1, false); + move_imm_to_reg(OpndSize_32, 0, 1, false); + move_imm_to_reg(OpndSize_32, 1, 2, false); + move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false); + + //default: 0xffffffff?? + move_imm_to_reg(OpndSize_32, 0xffffffff, 4, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_Z, + 1, false, 4, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_P, + 3, false, 4, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_A, + 2, false, 4, false); + set_virtual_reg(vA, OpndSize_32, 4, false); + rPC += 2; + return 0; +} +//! lower bytecode CMPG_DOUBLE + +//! +int op_cmpg_double() { + u2 vA = INST_AA(inst); + u4 v1 = FETCH(1) & 0xff; + u4 v2 = FETCH(1) >> 8; + get_VR_sd(v1, 1, false); + compare_VR_sd_reg(v2, 1, false); + move_imm_to_reg(OpndSize_32, 0, 1, false); + move_imm_to_reg(OpndSize_32, 1, 2, false); + + //default: 0xffffffff?? + move_imm_to_reg(OpndSize_32, + 0xffffffff, 3, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_Z, + 1, false, 3, false); + //finalNaN: unordered + conditional_move_reg_to_reg(OpndSize_32, Condition_P, + 2, false, 3, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_A, + 2, false, 3, false); + set_virtual_reg(vA, OpndSize_32, 3, false); + rPC += 2; + return 0; +} +#define P_GPR_1 PhysicalReg_EBX +#define P_GPR_2 PhysicalReg_ECX +#define P_GPR_3 PhysicalReg_ESI +#define P_SCRATCH_1 PhysicalReg_EDX +#define P_SCRATCH_2 PhysicalReg_EAX +#define OPTION_OLD //for simpler cfg +//! lower bytecode CMP_LONG + +//! +int op_cmp_long() { + u2 vA = INST_AA(inst); + u4 v1 = FETCH(1) & 0xff; + u4 v2 = FETCH(1) >> 8; + get_virtual_reg(v1+1, OpndSize_32, 2, false); +#ifdef OPTION_OLD + move_imm_to_reg(OpndSize_32, 0xffffffff, 3, false); + move_imm_to_reg(OpndSize_32, 1, 4, false); + move_imm_to_reg(OpndSize_32, 0, 5, false); +#endif + compare_VR_reg(OpndSize_32, + v2+1, 2, false); +#ifndef OPTION_OLD + conditional_jump(Condition_L, ".cmp_long_less", true); + conditional_jump(Condition_G, ".cmp_long_greater", true); +#else + conditional_jump(Condition_E, ".cmp_long_equal", true); + rememberState(1); + conditional_move_reg_to_reg(OpndSize_32, Condition_L, //below vs less + 3, false, 6, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_G, //above vs greater + 4, false, 6, false); + set_virtual_reg(vA, OpndSize_32, 6, false); + rememberState(2); + unconditional_jump(".cmp_long_okay", true); + insertLabel(".cmp_long_equal", true); + goToState(1); +#endif + + get_virtual_reg(v1, OpndSize_32, 1, false); + compare_VR_reg(OpndSize_32, + v2, 1, false); +#ifdef OPTION_OLD + conditional_move_reg_to_reg(OpndSize_32, Condition_E, + 5, false, 6, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_B, //below vs less + 3, false, 6, false); + conditional_move_reg_to_reg(OpndSize_32, Condition_A, //above vs greater + 4, false, 6, false); + set_virtual_reg(vA, OpndSize_32, 6, false); + transferToState(2); +#else + conditional_jump(Condition_A, ".cmp_long_greater", true); + conditional_jump(Condition_NE, ".cmp_long_less", true); + set_VR_to_imm(vA, OpndSize_32, 0); + unconditional_jump(".cmp_long_okay", true); + + insertLabel(".cmp_long_less", true); + set_VR_to_imm(vA, OpndSize_32, 0xffffffff); + unconditional_jump(".cmp_long_okay", true); + + insertLabel(".cmp_long_greater", true); + set_VR_to_imm(vA, OpndSize_32, 1); +#endif + insertLabel(".cmp_long_okay", true); + rPC += 2; + return 0; +} +#undef P_GPR_1 +#undef P_GPR_2 +#undef P_GPR_3 |