diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/dex/compiler_enums.h | 6 | ||||
-rw-r--r-- | compiler/dex/frontend.cc | 4 | ||||
-rw-r--r-- | compiler/dex/quick/arm/target_arm.cc | 1 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 1 | ||||
-rw-r--r-- | compiler/dex/quick/gen_common.cc | 6 | ||||
-rw-r--r-- | compiler/dex/quick/gen_invoke.cc | 37 | ||||
-rw-r--r-- | compiler/dex/quick/gen_loadstore.cc | 36 | ||||
-rw-r--r-- | compiler/dex/quick/mips/target_mips.cc | 1 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 64 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 4 | ||||
-rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 28 | ||||
-rw-r--r-- | compiler/dex/quick/x86/call_x86.cc | 39 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 63 | ||||
-rw-r--r-- | compiler/dex/quick/x86/fp_x86.cc | 65 | ||||
-rw-r--r-- | compiler/dex/quick/x86/int_x86.cc | 656 | ||||
-rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 587 | ||||
-rw-r--r-- | compiler/dex/quick/x86/utility_x86.cc | 171 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 26 |
18 files changed, 1478 insertions, 317 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index eb48cc3783..f0b47878e6 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -48,10 +48,16 @@ enum SpecialTargetRegister { kArg1, kArg2, kArg3, + kArg4, + kArg5, kFArg0, kFArg1, kFArg2, kFArg3, + kFArg4, + kFArg5, + kFArg6, + kFArg7, kRet0, kRet1, kInvokeTgt, diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 5b9c763af6..547c0f6b30 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -707,7 +707,7 @@ int x86_64_support_list[] = { // which has problems with long, float, double constexpr char arm64_supported_types[] = "ZBSCILVJFD"; // (x84_64) We still have troubles with compiling longs/doubles/floats -constexpr char x86_64_supported_types[] = "ZBSCILV"; +constexpr char x86_64_supported_types[] = "ZBSCILVJFD"; // TODO: Remove this when we are able to compile everything. static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) { @@ -718,7 +718,7 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) // 1 is for the return type. Currently, we only accept 2 parameters at the most. // (x86_64): For now we have the same limitation. But we might want to split this // check in future into two separate cases for arm64 and x86_64. - if (shorty_size > (1 + 2)) { + if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) { return false; } diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index bd9c8b4b75..3b30cde0d4 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -113,6 +113,7 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { case kHiddenArg: res_reg = rs_r12; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index b287399900..ce9528632e 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -127,6 +127,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kHiddenArg: res_reg = rs_x12; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = RegStorage::InvalidReg(); break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 62c81d05bb..69ca7154e4 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -1959,7 +1959,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc switch (opcode) { case Instruction::NOT_LONG: - if (cu->instruction_set == kArm64) { + if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) { mir_to_lir->GenNotLong(rl_dest, rl_src2); return; } @@ -2009,7 +2009,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::DIV_LONG: case Instruction::DIV_LONG_2ADDR: - if (cu->instruction_set == kArm64) { + if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) { mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true); return; } @@ -2020,7 +2020,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc break; case Instruction::REM_LONG: case Instruction::REM_LONG_2ADDR: - if (cu->instruction_set == kArm64) { + if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) { mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false); return; } diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 842533b66b..ee68fe2561 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -290,26 +290,51 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> } LoadValueDirectWideFixed(arg1, r_tmp); } else { - RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2)); + RegStorage r_tmp; + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2)); + } LoadValueDirectWideFixed(arg1, r_tmp); } } } else { RegStorage r_tmp; if (arg0.fp) { - r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1)); + } } else { - r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1)); + } } LoadValueDirectWideFixed(arg0, r_tmp); if (arg1.wide == 0) { - LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2)); + if (cu_->instruction_set == kX86_64) { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1)); + } else { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2)); + } } else { RegStorage r_tmp; if (arg1.fp) { - r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3)); + } } else { - r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)); + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)); + } } LoadValueDirectWideFixed(arg1, r_tmp); } diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 2c8b9b9adf..6ef793427c 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -391,24 +391,34 @@ RegLocation Mir2Lir::ForceTemp(RegLocation loc) { return loc; } -// FIXME: will need an update for 64-bit core regs. RegLocation Mir2Lir::ForceTempWide(RegLocation loc) { DCHECK(loc.wide); DCHECK(loc.location == kLocPhysReg); DCHECK(!loc.reg.IsFloat()); - if (IsTemp(loc.reg.GetLow())) { - Clobber(loc.reg.GetLow()); - } else { - RegStorage temp_low = AllocTemp(); - OpRegCopy(temp_low, loc.reg.GetLow()); - loc.reg.SetLowReg(temp_low.GetReg()); - } - if (IsTemp(loc.reg.GetHigh())) { - Clobber(loc.reg.GetHigh()); + + if (!loc.reg.IsPair()) { + if (IsTemp(loc.reg)) { + Clobber(loc.reg); + } else { + RegStorage temp = AllocTempWide(); + OpRegCopy(temp, loc.reg); + loc.reg = temp; + } } else { - RegStorage temp_high = AllocTemp(); - OpRegCopy(temp_high, loc.reg.GetHigh()); - loc.reg.SetHighReg(temp_high.GetReg()); + if (IsTemp(loc.reg.GetLow())) { + Clobber(loc.reg.GetLow()); + } else { + RegStorage temp_low = AllocTemp(); + OpRegCopy(temp_low, loc.reg.GetLow()); + loc.reg.SetLowReg(temp_low.GetReg()); + } + if (IsTemp(loc.reg.GetHigh())) { + Clobber(loc.reg.GetHigh()); + } else { + RegStorage temp_high = AllocTemp(); + OpRegCopy(temp_high, loc.reg.GetHigh()); + loc.reg.SetHighReg(temp_high.GetReg()); + } } // Ensure that this doesn't represent the original SR any more. diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index c1a7c990f0..381c7ce0aa 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -98,6 +98,7 @@ RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { case kHiddenArg: res_reg = rs_rT0; break; case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break; case kCount: res_reg = rs_rMIPS_COUNT; break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 1f12b6fe69..a85be5e90c 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -68,20 +68,51 @@ void Mir2Lir::LockArg(int in_position, bool wide) { // TODO: needs revisit for 64-bit. RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) { - RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); - RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : - RegStorage::InvalidReg(); - int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { + + if (cu_->instruction_set == kX86) { /* * When doing a call for x86, it moves the stack pointer in order to push return. * Thus, we add another 4 bytes to figure out the out of caller (in of callee). - * TODO: This needs revisited for 64-bit. */ offset += sizeof(uint32_t); } + if (cu_->instruction_set == kX86_64) { + /* + * When doing a call for x86, it moves the stack pointer in order to push return. + * Thus, we add another 8 bytes to figure out the out of caller (in of callee). + */ + offset += sizeof(uint64_t); + } + + if (cu_->instruction_set == kX86_64) { + RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); + if (!reg_arg.Valid()) { + RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class); + LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32); + return new_reg; + } else { + // Check if we need to copy the arg to a different reg_class. + if (!RegClassMatches(reg_class, reg_arg)) { + if (wide) { + RegStorage new_reg = AllocTypedTempWide(false, reg_class); + OpRegCopyWide(new_reg, reg_arg); + reg_arg = new_reg; + } else { + RegStorage new_reg = AllocTypedTemp(false, reg_class); + OpRegCopy(new_reg, reg_arg); + reg_arg = new_reg; + } + } + } + return reg_arg; + } + + RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); + RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : + RegStorage::InvalidReg(); + // If the VR is wide and there is no register for high part, we need to load it. if (wide && !reg_arg_high.Valid()) { // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg. @@ -129,15 +160,22 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { + if (cu_->instruction_set == kX86) { /* * When doing a call for x86, it moves the stack pointer in order to push return. * Thus, we add another 4 bytes to figure out the out of caller (in of callee). - * TODO: This needs revisited for 64-bit. */ offset += sizeof(uint32_t); } + if (cu_->instruction_set == kX86_64) { + /* + * When doing a call for x86, it moves the stack pointer in order to push return. + * Thus, we add another 8 bytes to figure out the out of caller (in of callee). + */ + offset += sizeof(uint64_t); + } + if (!rl_dest.wide) { RegStorage reg = GetArgMappingToPhysicalReg(in_position); if (reg.Valid()) { @@ -146,6 +184,16 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { Load32Disp(TargetReg(kSp), offset, rl_dest.reg); } } else { + if (cu_->instruction_set == kX86_64) { + RegStorage reg = GetArgMappingToPhysicalReg(in_position); + if (reg.Valid()) { + OpRegCopy(rl_dest.reg, reg); + } else { + LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64); + } + return; + } + RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index ed94a8d844..9718acde6c 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -910,13 +910,13 @@ class Mir2Lir : public Backend { void GenInvoke(CallInfo* info); void GenInvokeNoInline(CallInfo* info); virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); - int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, + virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, const MethodReference& target_method, uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, InvokeType type, bool skip_this); - int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, const MethodReference& target_method, uint32_t vtable_idx, diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 39a036560e..c3832969a4 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -317,6 +317,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, #undef UNARY_ENCODING_MAP { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA, { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" }, + { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA, { REX_W, 0, 0x99, 0, 0, 0, 0, 0 }, "Cqo", "" }, { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" }, { kX86Push32R, kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0, 0, 0, 0, 0 }, "Push32R", "!0r" }, { kX86Pop32R, kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD, { 0, 0, 0x58, 0, 0, 0, 0, 0 }, "Pop32R", "!0r" }, @@ -326,6 +327,11 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } +#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \ +{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ +{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" } + #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \ { kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ @@ -341,8 +347,12 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Cvtsi2sd, 0xF2, 0x2A, REG_DEF0), EXT_0F_ENCODING_MAP(Cvtsi2ss, 0xF3, 0x2A, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2sd, 0xF2, 0x2A, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2ss, 0xF3, 0x2A, REG_DEF0), EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0), EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvttsd2sqi, 0xF2, 0x2C, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Cvttss2sqi, 0xF3, 0x2C, REG_DEF0), EXT_0F_ENCODING_MAP(Cvtsd2si, 0xF2, 0x2D, REG_DEF0), EXT_0F_ENCODING_MAP(Cvtss2si, 0xF3, 0x2D, REG_DEF0), EXT_0F_ENCODING_MAP(Ucomisd, 0x66, 0x2E, SETS_CCODES|REG_USE0), @@ -428,10 +438,19 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86MovhpsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" }, EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E, REG_DEF0), + EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0), + { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE1, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxRR", "!0r,!1r" }, + { kX86MovqrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxMR", "[!0r+!1d],!2r" }, + { kX86MovqrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" }, { kX86MovdrxMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" }, { kX86MovdrxAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86MovsxdRR, kRegReg, IS_BINARY_OP | REG_DEF0 | REG_USE1, { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRR", "!0r,!1r" }, + { kX86MovsxdRM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRM", "!0r,[!1r+!2d]" }, + { kX86MovsxdRA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Set8R, kRegCond, IS_BINARY_OP | REG_DEF0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" }, { kX86Set8M, kMemCond, IS_STORE | IS_TERTIARY_OP | REG_USE0 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" }, { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" }, @@ -442,6 +461,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), + EXT_0F_ENCODING_MAP(Imul64, REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" }, { kX86CmpxchgMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" }, @@ -507,7 +527,7 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa } if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) { // BP requires an explicit displacement, even when it's 0. - if (entry->opcode != kX86Lea32RA) { + if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) { DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name; } size += IS_SIMM8(displacement) ? 1 : 4; @@ -676,7 +696,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kMacro: // lir operands - 0: reg DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + - ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, + ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], 0, 0, lir->operands[0], NO_REG, false) - // shorter ax encoding (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); @@ -1408,8 +1428,8 @@ void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(0x58 + RegStorage::RegNum(reg)); // pop reg - EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], RegStorage::RegNum(reg), - offset + 5 /* size of call +0 */); + EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], + RegStorage::RegNum(reg), offset + 5 /* size of call +0 */); } void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index fc0b305fc3..f5fce34f2b 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -86,11 +86,19 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, if (base_of_code_ != nullptr) { // We can use the saved value. RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } start_of_method_reg = rl_method.reg; store_method_addr_used_ = true; } else { - start_of_method_reg = AllocTemp(); + if (Gen64Bit()) { + start_of_method_reg = AllocTempWide(); + } else { + start_of_method_reg = AllocTemp(); + } NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); } int low_key = s4FromSwitchData(&table[2]); @@ -108,9 +116,14 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, // Load the displacement from the switch table RegStorage disp_reg = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec)); + NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), + 2, WrapPointer(tab_rec)); // Add displacement to start of method - OpRegReg(kOpAdd, start_of_method_reg, disp_reg); + if (Gen64Bit()) { + NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg()); + } else { + OpRegReg(kOpAdd, start_of_method_reg, disp_reg); + } // ..and go! LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg()); tab_rec->anchor = switch_branch; @@ -150,13 +163,18 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { if (base_of_code_ != nullptr) { // We can use the saved value. RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - LoadValueDirect(rl_method, rs_rX86_ARG2); + if (rl_method.wide) { + LoadValueDirectWide(rl_method, rs_rX86_ARG2); + } else { + LoadValueDirect(rl_method, rs_rX86_ARG2); + } store_method_addr_used_ = true; } else { + // TODO(64) force to be 64-bit NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg()); } NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec)); - NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg()); + NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg()); if (Is64BitInstructionSet(cu_->instruction_set)) { CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0, rs_rX86_ARG1, true); @@ -264,9 +282,10 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>()); } LIR* branch = OpCondBranch(kCondUlt, nullptr); - AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, - frame_size_ - - GetInstructionSetPointerSize(cu_->instruction_set))); + AddSlowPath( + new(arena_)StackOverflowSlowPath(this, branch, + frame_size_ - + GetInstructionSetPointerSize(cu_->instruction_set))); } FlushIns(ArgLocs, rl_method); @@ -276,7 +295,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg()); int displacement = SRegOffset(base_of_code_->s_reg_low); // Native pointer - must be natural word size. - setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0); + setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, Gen64Bit() ? k64 : k32); } FreeTemp(rs_rX86_ARG0); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 648c148c15..38d60d2b54 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -20,9 +20,43 @@ #include "dex/compiler_internals.h" #include "x86_lir.h" +#include <map> + namespace art { class X86Mir2Lir : public Mir2Lir { + protected: + class InToRegStorageMapper { + public: + virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0; + virtual ~InToRegStorageMapper() {} + }; + + class InToRegStorageX86_64Mapper : public InToRegStorageMapper { + public: + InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {} + virtual ~InToRegStorageX86_64Mapper() {} + virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide); + private: + int cur_core_reg_; + int cur_fp_reg_; + }; + + class InToRegStorageMapping { + public: + InToRegStorageMapping() : initialized_(false) {} + void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper); + int GetMaxMappedIn() { return max_mapped_in_; } + bool IsThereStackMapped() { return is_there_stack_mapped_; } + RegStorage Get(int in_position); + bool IsInitialized() { return initialized_; } + private: + std::map<int, RegStorage> mapping_; + int max_mapped_in_; + bool is_there_stack_mapped_; + bool initialized_; + }; + public: X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit); @@ -56,6 +90,7 @@ class X86Mir2Lir : public Mir2Lir { // Required for target - register utilities. RegStorage TargetReg(SpecialTargetRegister reg); RegStorage GetArgMappingToPhysicalReg(int arg_num); + RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -151,22 +186,25 @@ class X86Mir2Lir : public Mir2Lir { void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenIntToLong(RegLocation rl_dest, RegLocation rl_src); /* * @brief Generate a two address long operation with a constant value * @param rl_dest location of result * @param rl_src constant source operand * @param op Opcode to be generated + * @return success or not */ - void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); + bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op); /* * @brief Generate a three address long operation with a constant value * @param rl_dest location of result * @param rl_src1 source operand * @param rl_src2 constant source operand * @param op Opcode to be generated + * @return success or not */ - void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, + bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, Instruction::Code op); /** @@ -222,6 +260,9 @@ class X86Mir2Lir : public Mir2Lir { bool can_assume_type_is_in_dex_cache, uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src); + void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift); + // Single operation generators. LIR* OpUnconditionalBranch(LIR* target); LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target); @@ -306,6 +347,22 @@ class X86Mir2Lir : public Mir2Lir { */ void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg); + void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); + + int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + + int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + /* * @brief Generate a relative call to the method that will be patched at link time. * @param target_method The MethodReference of the method to be invoked. @@ -794,6 +851,8 @@ class X86Mir2Lir : public Mir2Lir { * @param mir A kMirOpConst128b MIR instruction to match. */ LIR *AddVectorLiteral(MIR *mir); + + InToRegStorageMapping in_to_reg_storage_mapping_; }; } // namespace art diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index 0421a5967a..c3580f76ae 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -272,21 +272,67 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, return; } case Instruction::LONG_TO_DOUBLE: + if (Gen64Bit()) { + rcSrc = kCoreReg; + op = kX86Cvtsqi2sdRR; + break; + } GenLongToFP(rl_dest, rl_src, true /* is_double */); return; case Instruction::LONG_TO_FLOAT: + if (Gen64Bit()) { + rcSrc = kCoreReg; + op = kX86Cvtsqi2ssRR; + break; + } GenLongToFP(rl_dest, rl_src, false /* is_double */); return; case Instruction::FLOAT_TO_LONG: - if (Is64BitInstructionSet(cu_->instruction_set)) { - GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src); + if (Gen64Bit()) { + rl_src = LoadValue(rl_src, kFPReg); + // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage temp_reg = AllocTempSingle(); + + // Set 0x7fffffffffffffff to rl_result + LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); + NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg()); + NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg()); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); + NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + LIR* branch_normal = NewLIR1(kX86Jmp8, 0); + branch_na_n->target = NewLIR0(kPseudoTargetLabel); + NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); + branch_normal->target = NewLIR0(kPseudoTargetLabel); + StoreValueWide(rl_dest, rl_result); } else { GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src); } return; case Instruction::DOUBLE_TO_LONG: - if (Is64BitInstructionSet(cu_->instruction_set)) { - GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src); + if (Gen64Bit()) { + rl_src = LoadValueWide(rl_src, kFPReg); + // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc() + ClobberSReg(rl_dest.s_reg_low); + rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage temp_reg = AllocTempDouble(); + + // Set 0x7fffffffffffffff to rl_result + LoadConstantWide(rl_result.reg, 0x7fffffffffffffff); + NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg()); + NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg()); + LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); + LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); + NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + LIR* branch_normal = NewLIR1(kX86Jmp8, 0); + branch_na_n->target = NewLIR0(kPseudoTargetLabel); + NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); + branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); + branch_normal->target = NewLIR0(kPseudoTargetLabel); + StoreValueWide(rl_dest, rl_result); } else { GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src); } @@ -434,9 +480,14 @@ void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { RegLocation rl_result; rl_src = LoadValueWide(rl_src, kCoreReg); - rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000); - OpRegCopy(rl_result.reg, rl_src.reg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + if (Gen64Bit()) { + LoadConstantWide(rl_result.reg, 0x8000000000000000); + OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); + } else { + OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000); + OpRegCopy(rl_result.reg, rl_src.reg); + } StoreValueWide(rl_dest, rl_result); } diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 1cc16b9e12..d214b8de7b 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -31,6 +31,23 @@ namespace art { */ void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + if (Gen64Bit()) { + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegReg(kOpXor, rl_result.reg, rl_result.reg); // result = 0 + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); + NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe); // result = (src1 != src2) ? 1 : result + RegStorage temp_reg = AllocTemp(); + OpRegReg(kOpNeg, temp_reg, rl_result.reg); + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); + // result = (src1 < src2) ? -result : result + OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg); + StoreValue(rl_dest, rl_result); + FreeTemp(temp_reg); + return; + } + FlushAllRegs(); LockCallTemps(); // Prepare for explicit register usage RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); @@ -108,7 +125,7 @@ LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { } if (r_dest.IsFloat() || r_src.IsFloat()) return OpFpRegCopy(r_dest, r_src); - LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, + LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR, r_dest.GetReg(), r_src.GetReg()); if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { res->flags.is_nop = true; @@ -133,36 +150,51 @@ void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { } else { // TODO: Prevent this from happening in the code. The result is often // unused or could have been loaded more easily from memory. - NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); - RegStorage r_tmp = AllocTempDouble(); - NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); - NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); - FreeTemp(r_tmp); + if (!r_src.IsPair()) { + DCHECK(!r_dest.IsPair()); + NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg()); + } else { + NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); + RegStorage r_tmp = AllocTempDouble(); + NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); + NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); + FreeTemp(r_tmp); + } } } else { if (src_fp) { - NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); - RegStorage temp_reg = AllocTempDouble(); - NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); - NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); - NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); + if (!r_dest.IsPair()) { + DCHECK(!r_src.IsPair()); + NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg()); + } else { + NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); + RegStorage temp_reg = AllocTempDouble(); + NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); + NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); + NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); + } } else { - DCHECK(r_dest.IsPair()); - DCHECK(r_src.IsPair()); - // Handle overlap - if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) { - // Deal with cycles. - RegStorage temp_reg = AllocTemp(); - OpRegCopy(temp_reg, r_dest.GetHigh()); - OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); - OpRegCopy(r_dest.GetLow(), temp_reg); - FreeTemp(temp_reg); - } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { - OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); - OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + DCHECK_EQ(r_dest.IsPair(), r_src.IsPair()); + if (!r_src.IsPair()) { + // Just copy the register directly. + OpRegCopy(r_dest, r_src); } else { - OpRegCopy(r_dest.GetLow(), r_src.GetLow()); - OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + // Handle overlap + if (r_src.GetHighReg() == r_dest.GetLowReg() && + r_src.GetLowReg() == r_dest.GetHighReg()) { + // Deal with cycles. + RegStorage temp_reg = AllocTemp(); + OpRegCopy(temp_reg, r_dest.GetHigh()); + OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); + OpRegCopy(r_dest.GetLow(), temp_reg); + FreeTemp(temp_reg); + } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { + OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + } else { + OpRegCopy(r_dest.GetLow(), r_src.GetLow()); + OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); + } } } } @@ -832,7 +864,11 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { // Address the start of the method RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - LoadValueDirectFixed(rl_method, reg); + if (rl_method.wide) { + LoadValueDirectWideFixed(rl_method, reg); + } else { + LoadValueDirectFixed(rl_method, reg); + } store_method_addr_used_ = true; // Load the proper value from the literal area. @@ -871,18 +907,23 @@ void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, } void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { - DCHECK(reg.IsPair()); // TODO: allow 64BitSolo. - // We are not supposed to clobber the incoming storage, so allocate a temporary. - RegStorage t_reg = AllocTemp(); + if (Gen64Bit()) { + DCHECK(reg.Is64Bit()); - // Doing an OR is a quick way to check if both registers are zero. This will set the flags. - OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); + NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0); + } else { + DCHECK(reg.IsPair()); + + // We are not supposed to clobber the incoming storage, so allocate a temporary. + RegStorage t_reg = AllocTemp(); + // Doing an OR is a quick way to check if both registers are zero. This will set the flags. + OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); + // The temp is no longer needed so free it at this time. + FreeTemp(t_reg); + } // In case of zero, throw ArithmeticException. GenDivZeroCheck(kCondEq); - - // The temp is no longer needed so free it at this time. - FreeTemp(t_reg); } void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, @@ -1221,18 +1262,22 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, if (rl_src.location == kLocPhysReg) { // Both operands are in registers. // But we must ensure that rl_src is in pair - rl_src = LoadValueWide(rl_src, kCoreReg); - if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { - // The registers are the same, so we would clobber it before the use. - RegStorage temp_reg = AllocTemp(); - OpRegCopy(temp_reg, rl_dest.reg); - rl_src.reg.SetHighReg(temp_reg.GetReg()); - } - NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); + if (Gen64Bit()) { + NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg()); + } else { + rl_src = LoadValueWide(rl_src, kCoreReg); + if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { + // The registers are the same, so we would clobber it before the use. + RegStorage temp_reg = AllocTemp(); + OpRegCopy(temp_reg, rl_dest.reg); + rl_src.reg.SetHighReg(temp_reg.GetReg()); + } + NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); - x86op = GetOpcode(op, rl_dest, rl_src, true); - NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); - FreeTemp(rl_src.reg); + x86op = GetOpcode(op, rl_dest, rl_src, true); + NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); + FreeTemp(rl_src.reg); // ??? + } return; } @@ -1242,11 +1287,13 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_src.s_reg_low); - LIR *lir = NewLIR3(x86op, rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); + LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); - x86op = GetOpcode(op, rl_dest, rl_src, true); - lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); + if (!Gen64Bit()) { + x86op = GetOpcode(op, rl_dest, rl_src, true); + lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); + } AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); } @@ -1273,13 +1320,16 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instructi int r_base = TargetReg(kSp).GetReg(); int displacement = SRegOffset(rl_dest.s_reg_low); - LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, rl_src.reg.GetLowReg()); + LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, + Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); - x86op = GetOpcode(op, rl_dest, rl_src, true); - lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); + if (!Gen64Bit()) { + x86op = GetOpcode(op, rl_dest, rl_src, true); + lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); + } AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is64bit */); AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, @@ -1330,23 +1380,44 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, // Get one of the source operands into temporary register. rl_src1 = LoadValueWide(rl_src1, kCoreReg); - if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { - GenLongRegOrMemOp(rl_src1, rl_src2, op); - } else if (is_commutative) { - rl_src2 = LoadValueWide(rl_src2, kCoreReg); - // We need at least one of them to be a temporary. - if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { - rl_src1 = ForceTempWide(rl_src1); + if (Gen64Bit()) { + if (IsTemp(rl_src1.reg)) { GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else if (is_commutative) { + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + // We need at least one of them to be a temporary. + if (!IsTemp(rl_src2.reg)) { + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else { + GenLongRegOrMemOp(rl_src2, rl_src1, op); + StoreFinalValueWide(rl_dest, rl_src2); + return; + } } else { - GenLongRegOrMemOp(rl_src2, rl_src1, op); - StoreFinalValueWide(rl_dest, rl_src2); - return; + // Need LHS to be the temp. + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); } } else { - // Need LHS to be the temp. - rl_src1 = ForceTempWide(rl_src1); - GenLongRegOrMemOp(rl_src1, rl_src2, op); + if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else if (is_commutative) { + rl_src2 = LoadValueWide(rl_src2, kCoreReg); + // We need at least one of them to be a temporary. + if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } else { + GenLongRegOrMemOp(rl_src2, rl_src1, op); + StoreFinalValueWide(rl_dest, rl_src2); + return; + } + } else { + // Need LHS to be the temp. + rl_src1 = ForceTempWide(rl_src1); + GenLongRegOrMemOp(rl_src1, rl_src2, op); + } } StoreFinalValueWide(rl_dest, rl_src1); @@ -1378,27 +1449,91 @@ void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, } void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { - LOG(FATAL) << "Unexpected use GenNotLong()"; + if (Gen64Bit()) { + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_result; + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegCopy(rl_result.reg, rl_src.reg); + OpReg(kOpNot, rl_result.reg); + StoreValueWide(rl_dest, rl_result); + } else { + LOG(FATAL) << "Unexpected use GenNotLong()"; + } } void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div) { - LOG(FATAL) << "Unexpected use GenDivRemLong()"; + if (!Gen64Bit()) { + LOG(FATAL) << "Unexpected use GenDivRemLong()"; + return; + } + + // We have to use fixed registers, so flush all the temps. + FlushAllRegs(); + LockCallTemps(); // Prepare for explicit register usage. + + // Load LHS into RAX. + LoadValueDirectWideFixed(rl_src1, rs_r0q); + + // Load RHS into RCX. + LoadValueDirectWideFixed(rl_src2, rs_r1q); + + // Copy LHS sign bit into RDX. + NewLIR0(kx86Cqo64Da); + + // Handle division by zero case. + GenDivZeroCheckWide(rs_r1q); + + // Have to catch 0x8000000000000000/-1 case, or we will get an exception! + NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1); + LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); + + // RHS is -1. + LoadConstantWide(rs_r3q, 0x8000000000000000); + NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r3q.GetReg()); + LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); + + // In 0x8000000000000000/-1 case. + if (!is_div) { + // For DIV, RAX is already right. For REM, we need RDX 0. + NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg()); + } + LIR* done = NewLIR1(kX86Jmp8, 0); + + // Expected case. + minus_one_branch->target = NewLIR0(kPseudoTargetLabel); + minint_branch->target = minus_one_branch->target; + NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg()); + done->target = NewLIR0(kPseudoTargetLabel); + + // Result is in RAX for div and RDX for rem. + RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG}; + if (!is_div) { + rl_result.reg.SetReg(r2q); + } + + StoreValueWide(rl_dest, rl_result); } void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { rl_src = LoadValueWide(rl_src, kCoreReg); - RegLocation rl_result = ForceTempWide(rl_src); - if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && - ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { - // The registers are the same, so we would clobber it before the use. - RegStorage temp_reg = AllocTemp(); - OpRegCopy(temp_reg, rl_result.reg); - rl_result.reg.SetHighReg(temp_reg.GetReg()); + RegLocation rl_result; + if (Gen64Bit()) { + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegReg(kOpNeg, rl_result.reg, rl_src.reg); + } else { + rl_result = ForceTempWide(rl_src); + if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && + ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { + // The registers are the same, so we would clobber it before the use. + RegStorage temp_reg = AllocTemp(); + OpRegCopy(temp_reg, rl_result.reg); + rl_result.reg.SetHighReg(temp_reg.GetReg()); + } + OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow + OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF + OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh } - OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow - OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF - OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh StoreValueWide(rl_dest, rl_result); } @@ -1551,60 +1686,84 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, int shift_amount) { RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); - switch (opcode) { - case Instruction::SHL_LONG: - case Instruction::SHL_LONG_2ADDR: - DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); - LoadConstant(rl_result.reg.GetLow(), 0); - } else if (shift_amount > 31) { - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); - NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); - LoadConstant(rl_result.reg.GetLow(), 0); - } else { - OpRegCopy(rl_result.reg, rl_src.reg); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount); - NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); - } - break; - case Instruction::SHR_LONG: - case Instruction::SHR_LONG_2ADDR: - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); - } else if (shift_amount > 31) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); - NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); - } else { - OpRegCopy(rl_result.reg, rl_src.reg); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); - NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); - } - break; - case Instruction::USHR_LONG: - case Instruction::USHR_LONG_2ADDR: - if (shift_amount == 32) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - LoadConstant(rl_result.reg.GetHigh(), 0); - } else if (shift_amount > 31) { - OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); - NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); - LoadConstant(rl_result.reg.GetHigh(), 0); - } else { - OpRegCopy(rl_result.reg, rl_src.reg); - OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); - NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); - NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); - } - break; - default: - LOG(FATAL) << "Unexpected case"; + if (Gen64Bit()) { + OpKind op = static_cast<OpKind>(0); /* Make gcc happy */ + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + op = kOpLsl; + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + op = kOpAsr; + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + op = kOpLsr; + break; + default: + LOG(FATAL) << "Unexpected case"; + } + OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); + } else { + switch (opcode) { + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); + LoadConstant(rl_result.reg.GetLow(), 0); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); + NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); + LoadConstant(rl_result.reg.GetLow(), 0); + } else { + OpRegCopy(rl_result.reg, rl_src.reg); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), + shift_amount); + NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); + } + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); + NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); + } else { + OpRegCopy(rl_result.reg, rl_src.reg); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), + shift_amount); + NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); + } + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + if (shift_amount == 32) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + LoadConstant(rl_result.reg.GetHigh(), 0); + } else if (shift_amount > 31) { + OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); + NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); + LoadConstant(rl_result.reg.GetHigh(), 0); + } else { + OpRegCopy(rl_result.reg, rl_src.reg); + OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); + NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), + shift_amount); + NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); + } + break; + default: + LOG(FATAL) << "Unexpected case"; + } } return rl_result; } @@ -1634,24 +1793,26 @@ void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { + bool isConstSuccess = false; switch (opcode) { case Instruction::ADD_LONG: case Instruction::AND_LONG: case Instruction::OR_LONG: case Instruction::XOR_LONG: if (rl_src2.is_const) { - GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { DCHECK(rl_src1.is_const); - GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); } break; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (rl_src2.is_const) { - GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } else { GenSubLong(opcode, rl_dest, rl_src1, rl_src2); + isConstSuccess = true; } break; case Instruction::ADD_LONG_2ADDR: @@ -1660,20 +1821,24 @@ void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, case Instruction::AND_LONG_2ADDR: if (rl_src2.is_const) { if (GenerateTwoOperandInstructions()) { - GenLongImm(rl_dest, rl_src2, opcode); + isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode); } else { - GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); } } else { DCHECK(rl_src1.is_const); - GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); + isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); } break; default: - // Default - bail to non-const handler. - GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + isConstSuccess = false; break; } + + if (!isConstSuccess) { + // Default - bail to non-const handler. + GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); + } } bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { @@ -1695,40 +1860,50 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocat bool is_high_op) { bool rhs_in_mem = rhs.location != kLocPhysReg; bool dest_in_mem = dest.location != kLocPhysReg; + bool is64Bit = Gen64Bit(); DCHECK(!rhs_in_mem || !dest_in_mem); switch (op) { case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: if (dest_in_mem) { - return is_high_op ? kX86Adc32MR : kX86Add32MR; + return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR; } else if (rhs_in_mem) { - return is_high_op ? kX86Adc32RM : kX86Add32RM; + return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM; } - return is_high_op ? kX86Adc32RR : kX86Add32RR; + return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (dest_in_mem) { - return is_high_op ? kX86Sbb32MR : kX86Sub32MR; + return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR; } else if (rhs_in_mem) { - return is_high_op ? kX86Sbb32RM : kX86Sub32RM; + return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM; } - return is_high_op ? kX86Sbb32RR : kX86Sub32RR; + return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: if (dest_in_mem) { - return kX86And32MR; + return is64Bit ? kX86And64MR : kX86And32MR; + } + if (is64Bit) { + return rhs_in_mem ? kX86And64RM : kX86And64RR; } return rhs_in_mem ? kX86And32RM : kX86And32RR; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if (dest_in_mem) { - return kX86Or32MR; + return is64Bit ? kX86Or64MR : kX86Or32MR; + } + if (is64Bit) { + return rhs_in_mem ? kX86Or64RM : kX86Or64RR; } return rhs_in_mem ? kX86Or32RM : kX86Or32RR; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: if (dest_in_mem) { - return kX86Xor32MR; + return is64Bit ? kX86Xor64MR : kX86Xor32MR; + } + if (is64Bit) { + return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR; } return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; default: @@ -1740,6 +1915,7 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocat X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value) { bool in_mem = loc.location != kLocPhysReg; + bool is64Bit = Gen64Bit(); bool byte_imm = IS_SIMM8(value); DCHECK(in_mem || !loc.reg.IsFloat()); switch (op) { @@ -1747,43 +1923,61 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_h case Instruction::ADD_LONG_2ADDR: if (byte_imm) { if (in_mem) { - return is_high_op ? kX86Adc32MI8 : kX86Add32MI8; + return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8; } - return is_high_op ? kX86Adc32RI8 : kX86Add32RI8; + return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8; } if (in_mem) { - return is_high_op ? kX86Adc32MI : kX86Add32MI; + return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI; } - return is_high_op ? kX86Adc32RI : kX86Add32RI; + return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI; case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: if (byte_imm) { if (in_mem) { - return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; + return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; } - return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; + return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; } if (in_mem) { - return is_high_op ? kX86Sbb32MI : kX86Sub32MI; + return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI; } - return is_high_op ? kX86Sbb32RI : kX86Sub32RI; + return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI; case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: if (byte_imm) { + if (is64Bit) { + return in_mem ? kX86And64MI8 : kX86And64RI8; + } return in_mem ? kX86And32MI8 : kX86And32RI8; } + if (is64Bit) { + return in_mem ? kX86And64MI : kX86And64RI; + } return in_mem ? kX86And32MI : kX86And32RI; case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: if (byte_imm) { + if (is64Bit) { + return in_mem ? kX86Or64MI8 : kX86Or64RI8; + } return in_mem ? kX86Or32MI8 : kX86Or32RI8; } + if (is64Bit) { + return in_mem ? kX86Or64MI : kX86Or64RI; + } return in_mem ? kX86Or32MI : kX86Or32RI; case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: if (byte_imm) { + if (is64Bit) { + return in_mem ? kX86Xor64MI8 : kX86Xor64RI8; + } return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; } + if (is64Bit) { + return in_mem ? kX86Xor64MI : kX86Xor64RI; + } return in_mem ? kX86Xor32MI : kX86Xor32RI; default: LOG(FATAL) << "Unexpected opcode: " << op; @@ -1791,9 +1985,43 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_h } } -void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { +bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { DCHECK(rl_src.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src); + + if (Gen64Bit()) { + // We can do with imm only if it fits 32 bit + if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { + return false; + } + + rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); + + if ((rl_dest.location == kLocDalvikFrame) || + (rl_dest.location == kLocCompilerTemp)) { + int r_base = TargetReg(kSp).GetReg(); + int displacement = SRegOffset(rl_dest.s_reg_low); + + X86OpCode x86op = GetOpcode(op, rl_dest, false, val); + LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); + AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is64bit */); + AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, + false /* is_load */, true /* is64bit */); + return true; + } + + RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); + DCHECK_EQ(rl_result.location, kLocPhysReg); + DCHECK(!rl_result.reg.IsFloat()); + + X86OpCode x86op = GetOpcode(op, rl_result, false, val); + NewLIR2(x86op, rl_result.reg.GetReg(), val); + + StoreValueWide(rl_dest, rl_result); + return true; + } + int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); @@ -1820,7 +2048,7 @@ void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /* is_load */, true /* is64bit */); } - return; + return true; } RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); @@ -1836,12 +2064,38 @@ void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); } StoreValueWide(rl_dest, rl_result); + return true; } -void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, +bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, Instruction::Code op) { DCHECK(rl_src2.is_const); int64_t val = mir_graph_->ConstantValueWide(rl_src2); + + if (Gen64Bit()) { + // We can do with imm only if it fits 32 bit + if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { + return false; + } + if (rl_dest.location == kLocPhysReg && + rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) { + X86OpCode x86op = GetOpcode(op, rl_dest, false, val); + NewLIR2(x86op, rl_dest.reg.GetReg(), val); + StoreFinalValueWide(rl_dest, rl_dest); + return true; + } + + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + // We need the values to be in a temporary + RegLocation rl_result = ForceTempWide(rl_src1); + + X86OpCode x86op = GetOpcode(op, rl_result, false, val); + NewLIR2(x86op, rl_result.reg.GetReg(), val); + + StoreFinalValueWide(rl_dest, rl_result); + return true; + } + int32_t val_lo = Low32Bits(val); int32_t val_hi = High32Bits(val); rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); @@ -1861,7 +2115,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, } StoreFinalValueWide(rl_dest, rl_dest); - return; + return true; } rl_src1 = LoadValueWide(rl_src1, kCoreReg); @@ -1879,6 +2133,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, } StoreFinalValueWide(rl_dest, rl_result); + return true; } // For final classes there are no sub-classes to check and so we can answer the instance-of @@ -2239,7 +2494,8 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, // We should be careful with order here // If rl_dest and rl_lhs points to the same VR we should load first // If the are different we should find a register first for dest - if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { + if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == + mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { rl_lhs = LoadValue(rl_lhs, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); // No-op if these are the same. @@ -2289,4 +2545,82 @@ bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_ // Everything will be fine :-). return true; } + +void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { + if (!Gen64Bit()) { + Mir2Lir::GenIntToLong(rl_dest, rl_src); + return; + } + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (rl_src.location == kLocPhysReg) { + NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); + } else { + int displacement = SRegOffset(rl_src.s_reg_low); + LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(), + displacement + LOWORD_OFFSET); + AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, + true /* is_load */, true /* is_64bit */); + } + StoreValueWide(rl_dest, rl_result); +} + +void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, + RegLocation rl_src1, RegLocation rl_shift) { + if (!Gen64Bit()) { + Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift); + return; + } + + bool is_two_addr = false; + OpKind op = kOpBkpt; + RegLocation rl_result; + + switch (opcode) { + case Instruction::SHL_LONG_2ADDR: + is_two_addr = true; + // Fallthrough + case Instruction::SHL_LONG: + op = kOpLsl; + break; + case Instruction::SHR_LONG_2ADDR: + is_two_addr = true; + // Fallthrough + case Instruction::SHR_LONG: + op = kOpAsr; + break; + case Instruction::USHR_LONG_2ADDR: + is_two_addr = true; + // Fallthrough + case Instruction::USHR_LONG: + op = kOpLsr; + break; + default: + op = kOpBkpt; + } + + // X86 doesn't require masking and must use ECX. + RegStorage t_reg = TargetReg(kCount); // rCX + LoadValueDirectFixed(rl_shift, t_reg); + if (is_two_addr) { + // Can we do this directly into memory? + rl_result = UpdateLocWideTyped(rl_dest, kCoreReg); + if (rl_result.location != kLocPhysReg) { + // Okay, we can do this into memory + OpMemReg(op, rl_result, t_reg.GetReg()); + } else if (!rl_result.reg.IsFloat()) { + // Can do this directly into the result register + OpRegReg(op, rl_result.reg, t_reg); + StoreFinalValueWide(rl_dest, rl_result); + } + } else { + // Three address form, or we can't do directly. + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_result = EvalLocWide(rl_dest, kCoreReg, true); + OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg); + StoreFinalValueWide(rl_dest, rl_result); + } + + FreeTemp(t_reg); +} + } // namespace art diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 4d8fd1b283..1ac15a21d8 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -132,10 +132,18 @@ X86NativeRegisterPool rX86_ARG0; X86NativeRegisterPool rX86_ARG1; X86NativeRegisterPool rX86_ARG2; X86NativeRegisterPool rX86_ARG3; +#ifdef TARGET_REX_SUPPORT +X86NativeRegisterPool rX86_ARG4; +X86NativeRegisterPool rX86_ARG5; +#endif X86NativeRegisterPool rX86_FARG0; X86NativeRegisterPool rX86_FARG1; X86NativeRegisterPool rX86_FARG2; X86NativeRegisterPool rX86_FARG3; +X86NativeRegisterPool rX86_FARG4; +X86NativeRegisterPool rX86_FARG5; +X86NativeRegisterPool rX86_FARG6; +X86NativeRegisterPool rX86_FARG7; X86NativeRegisterPool rX86_RET0; X86NativeRegisterPool rX86_RET1; X86NativeRegisterPool rX86_INVOKE_TGT; @@ -145,10 +153,16 @@ RegStorage rs_rX86_ARG0; RegStorage rs_rX86_ARG1; RegStorage rs_rX86_ARG2; RegStorage rs_rX86_ARG3; +RegStorage rs_rX86_ARG4; +RegStorage rs_rX86_ARG5; RegStorage rs_rX86_FARG0; RegStorage rs_rX86_FARG1; RegStorage rs_rX86_FARG2; RegStorage rs_rX86_FARG3; +RegStorage rs_rX86_FARG4; +RegStorage rs_rX86_FARG5; +RegStorage rs_rX86_FARG6; +RegStorage rs_rX86_FARG7; RegStorage rs_rX86_RET0; RegStorage rs_rX86_RET1; RegStorage rs_rX86_INVOKE_TGT; @@ -164,7 +178,7 @@ RegLocation X86Mir2Lir::LocCReturnRef() { } RegLocation X86Mir2Lir::LocCReturnWide() { - return x86_loc_c_return_wide; + return Gen64Bit() ? x86_64_loc_c_return_wide : x86_loc_c_return_wide; } RegLocation X86Mir2Lir::LocCReturnFloat() { @@ -188,35 +202,27 @@ RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kArg1: res_reg = rs_rX86_ARG1; break; case kArg2: res_reg = rs_rX86_ARG2; break; case kArg3: res_reg = rs_rX86_ARG3; break; + case kArg4: res_reg = rs_rX86_ARG4; break; + case kArg5: res_reg = rs_rX86_ARG5; break; case kFArg0: res_reg = rs_rX86_FARG0; break; case kFArg1: res_reg = rs_rX86_FARG1; break; case kFArg2: res_reg = rs_rX86_FARG2; break; case kFArg3: res_reg = rs_rX86_FARG3; break; + case kFArg4: res_reg = rs_rX86_FARG4; break; + case kFArg5: res_reg = rs_rX86_FARG5; break; + case kFArg6: res_reg = rs_rX86_FARG6; break; + case kFArg7: res_reg = rs_rX86_FARG7; break; case kRet0: res_reg = rs_rX86_RET0; break; case kRet1: res_reg = rs_rX86_RET1; break; case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break; case kHiddenArg: res_reg = rs_rAX; break; case kHiddenFpArg: res_reg = rs_fr0; break; case kCount: res_reg = rs_rX86_COUNT; break; + default: res_reg = RegStorage::InvalidReg(); } return res_reg; } -RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - // For the 32-bit internal ABI, the first 3 arguments are passed in registers. - // TODO: This is not 64-bit compliant and depends on new internal ABI. - switch (arg_num) { - case 0: - return rs_rX86_ARG1; - case 1: - return rs_rX86_ARG2; - case 2: - return rs_rX86_ARG3; - default: - return RegStorage::InvalidReg(); - } -} - /* * Decode the register id. */ @@ -482,6 +488,20 @@ void X86Mir2Lir::LockCallTemps() { LockTemp(rs_rX86_ARG1); LockTemp(rs_rX86_ARG2); LockTemp(rs_rX86_ARG3); +#ifdef TARGET_REX_SUPPORT + if (Gen64Bit()) { + LockTemp(rs_rX86_ARG4); + LockTemp(rs_rX86_ARG5); + LockTemp(rs_rX86_FARG0); + LockTemp(rs_rX86_FARG1); + LockTemp(rs_rX86_FARG2); + LockTemp(rs_rX86_FARG3); + LockTemp(rs_rX86_FARG4); + LockTemp(rs_rX86_FARG5); + LockTemp(rs_rX86_FARG6); + LockTemp(rs_rX86_FARG7); + } +#endif } /* To be used when explicitly managing register use */ @@ -490,6 +510,20 @@ void X86Mir2Lir::FreeCallTemps() { FreeTemp(rs_rX86_ARG1); FreeTemp(rs_rX86_ARG2); FreeTemp(rs_rX86_ARG3); +#ifdef TARGET_REX_SUPPORT + if (Gen64Bit()) { + FreeTemp(rs_rX86_ARG4); + FreeTemp(rs_rX86_ARG5); + FreeTemp(rs_rX86_FARG0); + FreeTemp(rs_rX86_FARG1); + FreeTemp(rs_rX86_FARG2); + FreeTemp(rs_rX86_FARG3); + FreeTemp(rs_rX86_FARG4); + FreeTemp(rs_rX86_FARG5); + FreeTemp(rs_rX86_FARG6); + FreeTemp(rs_rX86_FARG7); + } +#endif } bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { @@ -653,6 +687,14 @@ bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) { } RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) { + // X86_64 can handle any size. + if (Gen64Bit()) { + if (size == kReference) { + return kRefReg; + } + return kCoreReg; + } + if (UNLIKELY(is_volatile)) { // On x86, atomic 64-bit load/store requires an fp register. // Smaller aligned load/store is atomic for both core and fp registers. @@ -688,11 +730,37 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* rs_rX86_ARG1 = rs_rSI; rs_rX86_ARG2 = rs_rDX; rs_rX86_ARG3 = rs_rCX; +#ifdef TARGET_REX_SUPPORT + rs_rX86_ARG4 = rs_r8; + rs_rX86_ARG5 = rs_r9; +#else + rs_rX86_ARG4 = RegStorage::InvalidReg(); + rs_rX86_ARG5 = RegStorage::InvalidReg(); +#endif + rs_rX86_FARG0 = rs_fr0; + rs_rX86_FARG1 = rs_fr1; + rs_rX86_FARG2 = rs_fr2; + rs_rX86_FARG3 = rs_fr3; + rs_rX86_FARG4 = rs_fr4; + rs_rX86_FARG5 = rs_fr5; + rs_rX86_FARG6 = rs_fr6; + rs_rX86_FARG7 = rs_fr7; rX86_ARG0 = rDI; rX86_ARG1 = rSI; rX86_ARG2 = rDX; rX86_ARG3 = rCX; - // TODO: ARG4(r8), ARG5(r9), floating point args. +#ifdef TARGET_REX_SUPPORT + rX86_ARG4 = r8; + rX86_ARG5 = r9; +#endif + rX86_FARG0 = fr0; + rX86_FARG1 = fr1; + rX86_FARG2 = fr2; + rX86_FARG3 = fr3; + rX86_FARG4 = fr4; + rX86_FARG5 = fr5; + rX86_FARG6 = fr6; + rX86_FARG7 = fr7; } else { rs_rX86_SP = rs_rX86_SP_32; @@ -700,23 +768,32 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* rs_rX86_ARG1 = rs_rCX; rs_rX86_ARG2 = rs_rDX; rs_rX86_ARG3 = rs_rBX; + rs_rX86_ARG4 = RegStorage::InvalidReg(); + rs_rX86_ARG5 = RegStorage::InvalidReg(); + rs_rX86_FARG0 = rs_rAX; + rs_rX86_FARG1 = rs_rCX; + rs_rX86_FARG2 = rs_rDX; + rs_rX86_FARG3 = rs_rBX; + rs_rX86_FARG4 = RegStorage::InvalidReg(); + rs_rX86_FARG5 = RegStorage::InvalidReg(); + rs_rX86_FARG6 = RegStorage::InvalidReg(); + rs_rX86_FARG7 = RegStorage::InvalidReg(); rX86_ARG0 = rAX; rX86_ARG1 = rCX; rX86_ARG2 = rDX; rX86_ARG3 = rBX; + rX86_FARG0 = rAX; + rX86_FARG1 = rCX; + rX86_FARG2 = rDX; + rX86_FARG3 = rBX; + // TODO(64): Initialize with invalid reg +// rX86_ARG4 = RegStorage::InvalidReg(); +// rX86_ARG5 = RegStorage::InvalidReg(); } - rs_rX86_FARG0 = rs_rAX; - rs_rX86_FARG1 = rs_rCX; - rs_rX86_FARG2 = rs_rDX; - rs_rX86_FARG3 = rs_rBX; rs_rX86_RET0 = rs_rAX; rs_rX86_RET1 = rs_rDX; rs_rX86_INVOKE_TGT = rs_rAX; rs_rX86_COUNT = rs_rCX; - rX86_FARG0 = rAX; - rX86_FARG1 = rCX; - rX86_FARG2 = rDX; - rX86_FARG3 = rBX; rX86_RET0 = rAX; rX86_RET1 = rDX; rX86_INVOKE_TGT = rAX; @@ -1356,7 +1433,11 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { // Address the start of the method. RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } // Load the proper value from the literal area. // We don't know the proper offset for the value, so pick one that will force @@ -1676,4 +1757,458 @@ LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) { return new_value; } +// ------------ ABI support: mapping of args to physical registers ------------- +RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) { + const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5}; + const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage); + const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3, + rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7}; + const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage); + + RegStorage result = RegStorage::InvalidReg(); + if (is_double_or_float) { + if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { + result = fpArgMappingToPhysicalReg[cur_fp_reg_++]; + if (result.Valid()) { + result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg()); + } + } + } else { + if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = coreArgMappingToPhysicalReg[cur_core_reg_++]; + if (result.Valid()) { + result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg()); + } + } + } + return result; +} + +RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) { + DCHECK(IsInitialized()); + auto res = mapping_.find(in_position); + return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); +} + +void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) { + DCHECK(mapper != nullptr); + max_mapped_in_ = -1; + is_there_stack_mapped_ = false; + for (int in_position = 0; in_position < count; in_position++) { + RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide); + if (reg.Valid()) { + mapping_[in_position] = reg; + max_mapped_in_ = std::max(max_mapped_in_, in_position); + if (reg.Is64BitSolo()) { + // We covered 2 args, so skip the next one + in_position++; + } + } else { + is_there_stack_mapped_ = true; + } + } + initialized_ = true; +} + +RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { + if (!Gen64Bit()) { + return GetCoreArgMappingToPhysicalReg(arg_num); + } + + if (!in_to_reg_storage_mapping_.IsInitialized()) { + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; + + InToRegStorageX86_64Mapper mapper; + in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper); + } + return in_to_reg_storage_mapping_.Get(arg_num); +} + +RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) { + // For the 32-bit internal ABI, the first 3 arguments are passed in registers. + // Not used for 64-bit, TODO: Move X86_32 to the same framework + switch (core_arg_num) { + case 0: + return rs_rX86_ARG1; + case 1: + return rs_rX86_ARG2; + case 2: + return rs_rX86_ARG3; + default: + return RegStorage::InvalidReg(); + } +} + +// ---------End of ABI support: mapping of args to physical registers ------------- + +/* + * If there are any ins passed in registers that have not been promoted + * to a callee-save register, flush them to the frame. Perform initial + * assignment of promoted arguments. + * + * ArgLocs is an array of location records describing the incoming arguments + * with one location record per word of argument. + */ +void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { + if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method); + /* + * Dummy up a RegLocation for the incoming Method* + * It will attempt to keep kArg0 live (or copy it to home location + * if promoted). + */ + + RegLocation rl_src = rl_method; + rl_src.location = kLocPhysReg; + rl_src.reg = TargetReg(kArg0); + rl_src.home = false; + MarkLive(rl_src); + StoreValue(rl_method, rl_src); + // If Method* has been promoted, explicitly flush + if (rl_method.location == kLocPhysReg) { + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + } + + if (cu_->num_ins == 0) { + return; + } + + int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; + /* + * Copy incoming arguments to their proper home locations. + * NOTE: an older version of dx had an issue in which + * it would reuse static method argument registers. + * This could result in the same Dalvik virtual register + * being promoted to both core and fp regs. To account for this, + * we only copy to the corresponding promoted physical register + * if it matches the type of the SSA name for the incoming + * argument. It is also possible that long and double arguments + * end up half-promoted. In those cases, we must flush the promoted + * half to memory as well. + */ + for (int i = 0; i < cu_->num_ins; i++) { + PromotionMap* v_map = &promotion_map_[start_vreg + i]; + RegStorage reg = RegStorage::InvalidReg(); + // get reg corresponding to input + reg = GetArgMappingToPhysicalReg(i); + + if (reg.Valid()) { + // If arriving in register + bool need_flush = true; + RegLocation* t_loc = &ArgLocs[i]; + if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { + OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); + need_flush = false; + } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { + OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg); + need_flush = false; + } else { + need_flush = true; + } + + // For wide args, force flush if not fully promoted + if (t_loc->wide) { + PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1); + // Is only half promoted? + need_flush |= (p_map->core_location != v_map->core_location) || + (p_map->fp_location != v_map->fp_location); + } + if (need_flush) { + if (t_loc->wide && t_loc->fp) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64); + // Increment i to skip the next one + i++; + } else if (t_loc->wide && !t_loc->fp) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64); + // Increment i to skip the next one + i++; + } else { + Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg); + } + } + } else { + // If arriving in frame & promoted + if (v_map->core_location == kLocPhysReg) { + Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg)); + } + if (v_map->fp_location == kLocPhysReg) { + Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg)); + } + } + } +} + +/* + * Load up to 5 arguments, the first three of which will be in + * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, + * and as part of the load sequence, it must be replaced with + * the target method pointer. Note, this may also be called + * for "range" variants if the number of arguments is 5 or fewer. + */ +int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, + int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, + uintptr_t direct_method, InvokeType type, bool skip_this) { + if (!Gen64Bit()) { + return Mir2Lir::GenDalvikArgsNoRange(info, + call_state, pcrLabel, next_call_insn, + target_method, + vtable_idx, direct_code, + direct_method, type, skip_this); + } + return GenDalvikArgsRange(info, + call_state, pcrLabel, next_call_insn, + target_method, + vtable_idx, direct_code, + direct_method, type, skip_this); +} + +/* + * May have 0+ arguments (also used for jumbo). Note that + * source virtual registers may be in physical registers, so may + * need to be flushed to home location before copying. This + * applies to arg3 and above (see below). + * + * Two general strategies: + * If < 20 arguments + * Pass args 3-18 using vldm/vstm block copy + * Pass arg0, arg1 & arg2 in kArg1-kArg3 + * If 20+ arguments + * Pass args arg19+ using memcpy block copy + * Pass arg0, arg1 & arg2 in kArg1-kArg3 + * + */ +int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, + LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, + InvokeType type, bool skip_this) { + if (!Gen64Bit()) { + return Mir2Lir::GenDalvikArgsRange(info, call_state, + pcrLabel, next_call_insn, + target_method, + vtable_idx, direct_code, direct_method, + type, skip_this); + } + + /* If no arguments, just return */ + if (info->num_arg_words == 0) + return call_state; + + const int start_index = skip_this ? 1 : 0; + + InToRegStorageX86_64Mapper mapper; + InToRegStorageMapping in_to_reg_storage_mapping; + in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); + const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); + const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 : + in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1; + int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped); + + // Fisrt of all, check whether it make sense to use bulk copying + // Optimization is aplicable only for range case + // TODO: make a constant instead of 2 + if (info->is_range && regs_left_to_pass_via_stack >= 2) { + // Scan the rest of the args - if in phys_reg flush to memory + for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) { + RegLocation loc = info->args[next_arg]; + if (loc.wide) { + loc = UpdateLocWide(loc); + if (loc.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64); + } + next_arg += 2; + } else { + loc = UpdateLoc(loc); + if (loc.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32); + } + next_arg++; + } + } + + // Logic below assumes that Method pointer is at offset zero from SP. + DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0); + + // The rest can be copied together + int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low); + int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set); + + int current_src_offset = start_offset; + int current_dest_offset = outs_offset; + + while (regs_left_to_pass_via_stack > 0) { + // This is based on the knowledge that the stack itself is 16-byte aligned. + bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; + bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0; + size_t bytes_to_move; + + /* + * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a + * a 128-bit move because we won't get the chance to try to aligned. If there are more than + * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned. + * We do this because we could potentially do a smaller move to align. + */ + if (regs_left_to_pass_via_stack == 4 || + (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) { + // Moving 128-bits via xmm register. + bytes_to_move = sizeof(uint32_t) * 4; + + // Allocate a free xmm temp. Since we are working through the calling sequence, + // we expect to have an xmm temporary available. AllocTempDouble will abort if + // there are no free registers. + RegStorage temp = AllocTempDouble(); + + LIR* ld1 = nullptr; + LIR* ld2 = nullptr; + LIR* st1 = nullptr; + LIR* st2 = nullptr; + + /* + * The logic is similar for both loads and stores. If we have 16-byte alignment, + * do an aligned move. If we have 8-byte alignment, then do the move in two + * parts. This approach prevents possible cache line splits. Finally, fall back + * to doing an unaligned move. In most cases we likely won't split the cache + * line but we cannot prove it and thus take a conservative approach. + */ + bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; + bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; + + if (src_is_16b_aligned) { + ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP); + } else if (src_is_8b_aligned) { + ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP); + ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1), + kMovHi128FP); + } else { + ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP); + } + + if (dest_is_16b_aligned) { + st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP); + } else if (dest_is_8b_aligned) { + st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP); + st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1), + temp, kMovHi128FP); + } else { + st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP); + } + + // TODO If we could keep track of aliasing information for memory accesses that are wider + // than 64-bit, we wouldn't need to set up a barrier. + if (ld1 != nullptr) { + if (ld2 != nullptr) { + // For 64-bit load we can actually set up the aliasing information. + AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true); + AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); + } else { + // Set barrier for 128-bit load. + SetMemRefType(ld1, true /* is_load */, kDalvikReg); + ld1->u.m.def_mask = ENCODE_ALL; + } + } + if (st1 != nullptr) { + if (st2 != nullptr) { + // For 64-bit store we can actually set up the aliasing information. + AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true); + AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); + } else { + // Set barrier for 128-bit store. + SetMemRefType(st1, false /* is_load */, kDalvikReg); + st1->u.m.def_mask = ENCODE_ALL; + } + } + + // Free the temporary used for the data movement. + FreeTemp(temp); + } else { + // Moving 32-bits via general purpose register. + bytes_to_move = sizeof(uint32_t); + + // Instead of allocating a new temp, simply reuse one of the registers being used + // for argument passing. + RegStorage temp = TargetReg(kArg3); + + // Now load the argument VR and store to the outs. + Load32Disp(TargetReg(kSp), current_src_offset, temp); + Store32Disp(TargetReg(kSp), current_dest_offset, temp); + } + + current_src_offset += bytes_to_move; + current_dest_offset += bytes_to_move; + regs_left_to_pass_via_stack -= (bytes_to_move >> 2); + } + DCHECK_EQ(regs_left_to_pass_via_stack, 0); + } + + // Now handle rest not registers if they are + if (in_to_reg_storage_mapping.IsThereStackMapped()) { + RegStorage regSingle = TargetReg(kArg2); + RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg()); + for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (!reg.Valid()) { + int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); + + if (rl_arg.wide) { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64); + } else { + LoadValueDirectWideFixed(rl_arg, regWide); + StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64); + } + i++; + } else { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32); + } else { + LoadValueDirectFixed(rl_arg, regSingle); + StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32); + } + } + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, type); + } + } + } + + // Finish with mapped registers + for (int i = start_index; i <= last_mapped_in; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (reg.Valid()) { + if (rl_arg.wide) { + LoadValueDirectWideFixed(rl_arg, reg); + i++; + } else { + LoadValueDirectFixed(rl_arg, reg); + } + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + } + + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + if (pcrLabel) { + if (Runtime::Current()->ExplicitNullChecks()) { + *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); + } else { + *pcrLabel = nullptr; + // In lieu of generating a check for kArg1 being null, we need to + // perform a load when doing implicit checks. + RegStorage tmp = AllocTemp(); + Load32Disp(TargetReg(kArg1), 0, tmp); + MarkPossibleNullPointerException(info->opt_flags); + FreeTemp(tmp); + } + } + return call_state; +} + } // namespace art + diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 618b3a5987..d074d8104d 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -89,11 +89,8 @@ LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg()); } else { // Note, there is no byte immediate form of a 32 bit immediate move. - if (r_dest.Is64Bit()) { - res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value); - } else { - res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value); - } + // 64-bit immediate is not supported by LIR structure + res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value); } if (r_dest_save.IsFloat()) { @@ -120,8 +117,8 @@ LIR* X86Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { LIR* X86Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { X86OpCode opcode = kX86Bkpt; switch (op) { - case kOpNeg: opcode = kX86Neg32R; break; - case kOpNot: opcode = kX86Not32R; break; + case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break; + case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break; case kOpRev: opcode = kX86Bswap32R; break; case kOpBlx: opcode = kX86CallR; break; default: @@ -138,6 +135,9 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { switch (op) { case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break; case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break; + case kOpLsl: opcode = kX86Sal64RI; break; + case kOpLsr: opcode = kX86Shr64RI; break; + case kOpAsr: opcode = kX86Sar64RI; break; default: LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op; } @@ -189,6 +189,7 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { } LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { + bool is64Bit = r_dest_src1.Is64Bit(); X86OpCode opcode = kX86Nop; bool src2_must_be_cx = false; switch (op) { @@ -207,33 +208,34 @@ LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) OpReg(kOpRev, r_dest_src1); return OpRegImm(kOpAsr, r_dest_src1, 16); // X86 binary opcodes - case kOpSub: opcode = kX86Sub32RR; break; - case kOpSbc: opcode = kX86Sbb32RR; break; - case kOpLsl: opcode = kX86Sal32RC; src2_must_be_cx = true; break; - case kOpLsr: opcode = kX86Shr32RC; src2_must_be_cx = true; break; - case kOpAsr: opcode = kX86Sar32RC; src2_must_be_cx = true; break; - case kOpMov: opcode = kX86Mov32RR; break; - case kOpCmp: opcode = kX86Cmp32RR; break; - case kOpAdd: opcode = kX86Add32RR; break; - case kOpAdc: opcode = kX86Adc32RR; break; - case kOpAnd: opcode = kX86And32RR; break; - case kOpOr: opcode = kX86Or32RR; break; - case kOpXor: opcode = kX86Xor32RR; break; + case kOpSub: opcode = is64Bit ? kX86Sub64RR : kX86Sub32RR; break; + case kOpSbc: opcode = is64Bit ? kX86Sbb64RR : kX86Sbb32RR; break; + case kOpLsl: opcode = is64Bit ? kX86Sal64RC : kX86Sal32RC; src2_must_be_cx = true; break; + case kOpLsr: opcode = is64Bit ? kX86Shr64RC : kX86Shr32RC; src2_must_be_cx = true; break; + case kOpAsr: opcode = is64Bit ? kX86Sar64RC : kX86Sar32RC; src2_must_be_cx = true; break; + case kOpMov: opcode = is64Bit ? kX86Mov64RR : kX86Mov32RR; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64RR : kX86Cmp32RR; break; + case kOpAdd: opcode = is64Bit ? kX86Add64RR : kX86Add32RR; break; + case kOpAdc: opcode = is64Bit ? kX86Adc64RR : kX86Adc32RR; break; + case kOpAnd: opcode = is64Bit ? kX86And64RR : kX86And32RR; break; + case kOpOr: opcode = is64Bit ? kX86Or64RR : kX86Or32RR; break; + case kOpXor: opcode = is64Bit ? kX86Xor64RR : kX86Xor32RR; break; case kOp2Byte: // TODO: there are several instances of this check. A utility function perhaps? // TODO: Similar to Arm's reg < 8 check. Perhaps add attribute checks to RegStorage? // Use shifts instead of a byte operand if the source can't be byte accessed. if (r_src2.GetRegNum() >= rs_rX86_SP.GetRegNum()) { - NewLIR2(kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg()); - NewLIR2(kX86Sal32RI, r_dest_src1.GetReg(), 24); - return NewLIR2(kX86Sar32RI, r_dest_src1.GetReg(), 24); + NewLIR2(is64Bit ? kX86Mov64RR : kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg()); + NewLIR2(is64Bit ? kX86Sal64RI : kX86Sal32RI, r_dest_src1.GetReg(), is64Bit ? 56 : 24); + return NewLIR2(is64Bit ? kX86Sar64RI : kX86Sar32RI, r_dest_src1.GetReg(), + is64Bit ? 56 : 24); } else { - opcode = kX86Movsx8RR; + opcode = is64Bit ? kX86Bkpt : kX86Movsx8RR; } break; - case kOp2Short: opcode = kX86Movsx16RR; break; - case kOp2Char: opcode = kX86Movzx16RR; break; - case kOpMul: opcode = kX86Imul32RR; break; + case kOp2Short: opcode = is64Bit ? kX86Bkpt : kX86Movsx16RR; break; + case kOp2Char: opcode = is64Bit ? kX86Bkpt : kX86Movzx16RR; break; + case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RR; break; default: LOG(FATAL) << "Bad case in OpRegReg " << op; break; @@ -354,16 +356,17 @@ LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, Re } LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) { + bool is64Bit = r_dest.Is64Bit(); X86OpCode opcode = kX86Nop; switch (op) { // X86 binary opcodes - case kOpSub: opcode = kX86Sub32RM; break; - case kOpMov: opcode = kX86Mov32RM; break; - case kOpCmp: opcode = kX86Cmp32RM; break; - case kOpAdd: opcode = kX86Add32RM; break; - case kOpAnd: opcode = kX86And32RM; break; - case kOpOr: opcode = kX86Or32RM; break; - case kOpXor: opcode = kX86Xor32RM; break; + case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break; + case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break; + case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break; + case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break; + case kOpOr: opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break; + case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break; case kOp2Byte: opcode = kX86Movsx8RM; break; case kOp2Short: opcode = kX86Movsx16RM; break; case kOp2Char: opcode = kX86Movzx16RM; break; @@ -382,63 +385,68 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int o LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) { DCHECK_NE(rl_dest.location, kLocPhysReg); int displacement = SRegOffset(rl_dest.s_reg_low); + bool is64Bit = rl_dest.wide != 0; X86OpCode opcode = kX86Nop; switch (op) { - case kOpSub: opcode = kX86Sub32MR; break; - case kOpMov: opcode = kX86Mov32MR; break; - case kOpCmp: opcode = kX86Cmp32MR; break; - case kOpAdd: opcode = kX86Add32MR; break; - case kOpAnd: opcode = kX86And32MR; break; - case kOpOr: opcode = kX86Or32MR; break; - case kOpXor: opcode = kX86Xor32MR; break; - case kOpLsl: opcode = kX86Sal32MC; break; - case kOpLsr: opcode = kX86Shr32MC; break; - case kOpAsr: opcode = kX86Sar32MC; break; + case kOpSub: opcode = is64Bit ? kX86Sub64MR : kX86Sub32MR; break; + case kOpMov: opcode = is64Bit ? kX86Mov64MR : kX86Mov32MR; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64MR : kX86Cmp32MR; break; + case kOpAdd: opcode = is64Bit ? kX86Add64MR : kX86Add32MR; break; + case kOpAnd: opcode = is64Bit ? kX86And64MR : kX86And32MR; break; + case kOpOr: opcode = is64Bit ? kX86Or64MR : kX86Or32MR; break; + case kOpXor: opcode = is64Bit ? kX86Xor64MR : kX86Xor32MR; break; + case kOpLsl: opcode = is64Bit ? kX86Sal64MC : kX86Sal32MC; break; + case kOpLsr: opcode = is64Bit ? kX86Shr64MC : kX86Shr32MC; break; + case kOpAsr: opcode = is64Bit ? kX86Sar64MC : kX86Sar32MC; break; default: LOG(FATAL) << "Bad case in OpMemReg " << op; break; } LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */); - AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */); return l; } LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) { DCHECK_NE(rl_value.location, kLocPhysReg); + bool is64Bit = r_dest.Is64Bit(); int displacement = SRegOffset(rl_value.s_reg_low); X86OpCode opcode = kX86Nop; switch (op) { - case kOpSub: opcode = kX86Sub32RM; break; - case kOpMov: opcode = kX86Mov32RM; break; - case kOpCmp: opcode = kX86Cmp32RM; break; - case kOpAdd: opcode = kX86Add32RM; break; - case kOpAnd: opcode = kX86And32RM; break; - case kOpOr: opcode = kX86Or32RM; break; - case kOpXor: opcode = kX86Xor32RM; break; - case kOpMul: opcode = kX86Imul32RM; break; + case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break; + case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break; + case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break; + case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break; + case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break; + case kOpOr: opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break; + case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break; + case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RM; break; default: LOG(FATAL) << "Bad case in OpRegMem " << op; break; } LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */); + AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */); return l; } LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { + bool is64Bit = r_dest.Is64Bit(); if (r_dest != r_src1 && r_dest != r_src2) { if (op == kOpAdd) { // lea special case, except can't encode rbp as base if (r_src1 == r_src2) { OpRegCopy(r_dest, r_src1); return OpRegImm(kOpLsl, r_dest, 1); } else if (r_src1 != rs_rBP) { - return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src1.GetReg() /* base */, - r_src2.GetReg() /* index */, 0 /* scale */, 0 /* disp */); + return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + r_src1.GetReg() /* base */, r_src2.GetReg() /* index */, + 0 /* scale */, 0 /* disp */); } else { - return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src2.GetReg() /* base */, - r_src1.GetReg() /* index */, 0 /* scale */, 0 /* disp */); + return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + r_src2.GetReg() /* base */, r_src1.GetReg() /* index */, + 0 /* scale */, 0 /* disp */); } } else { OpRegCopy(r_dest, r_src1); @@ -476,10 +484,10 @@ LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, } LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) { - if (op == kOpMul) { + if (op == kOpMul && !Gen64Bit()) { X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI; return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value); - } else if (op == kOpAnd) { + } else if (op == kOpAnd && !Gen64Bit()) { if (value == 0xFF && r_src.Low4()) { return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg()); } else if (value == 0xFFFF) { @@ -492,8 +500,9 @@ LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r5sib_no_base /* base */, r_src.GetReg() /* index */, value /* scale */, 0 /* disp */); } else if (op == kOpAdd) { // lea add special case - return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */, - rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */); + return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, + 0 /* scale */, value /* disp */); } OpRegCopy(r_dest, r_src); } @@ -556,7 +565,11 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { // Address the start of the method RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); + if (rl_method.wide) { + rl_method = LoadValueWide(rl_method, kCoreReg); + } else { + rl_method = LoadValue(rl_method, kCoreReg); + } // Load the proper value from the literal area. // We don't know the proper offset for the value, so pick one that will force @@ -582,8 +595,20 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { } } } else { - res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); - LoadConstantNoClobber(r_dest.GetHigh(), val_hi); + if (r_dest.IsPair()) { + res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); + LoadConstantNoClobber(r_dest.GetHigh(), val_hi); + } else { + // TODO(64) make int64_t value parameter of LoadConstantNoClobber + if (val_lo < 0) { + val_hi += 1; + } + res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi); + NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32); + if (val_lo != 0) { + NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo); + } + } } return res; } @@ -601,6 +626,8 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int case kDouble: if (r_dest.IsFloat()) { opcode = is_array ? kX86MovsdRA : kX86MovsdRM; + } else if (!pair) { + opcode = is_array ? kX86Mov64RA : kX86Mov64RM; } else { opcode = is_array ? kX86Mov32RA : kX86Mov32RM; } @@ -742,13 +769,10 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int case kDouble: if (r_src.IsFloat()) { opcode = is_array ? kX86MovsdAR : kX86MovsdMR; + } else if (!pair) { + opcode = is_array ? kX86Mov64AR : kX86Mov64MR; } else { - if (Gen64Bit()) { - opcode = is_array ? kX86Mov64AR : kX86Mov64MR; - } else { - // TODO(64): pair = true; - opcode = is_array ? kX86Mov32AR : kX86Mov32MR; - } + opcode = is_array ? kX86Mov32AR : kX86Mov32MR; } // TODO: double store is to unaligned address DCHECK_EQ((displacement & 0x3), 0); @@ -855,7 +879,7 @@ void X86Mir2Lir::AnalyzeMIR() { // Did we need a pointer to the method code? if (store_method_addr_) { - base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false); + base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, Gen64Bit() == true); } else { base_of_code_ = nullptr; } @@ -971,6 +995,7 @@ RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc, int reg_class) { loc.location = kLocDalvikFrame; } } + DCHECK(CheckCorePoolSanity()); return loc; } @@ -984,7 +1009,7 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) { loc.location = kLocDalvikFrame; } } + DCHECK(CheckCorePoolSanity()); return loc; } - } // namespace art diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index bb8df893f8..e550488a03 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -334,10 +334,18 @@ extern X86NativeRegisterPool rX86_ARG0; extern X86NativeRegisterPool rX86_ARG1; extern X86NativeRegisterPool rX86_ARG2; extern X86NativeRegisterPool rX86_ARG3; +#ifdef TARGET_REX_SUPPORT +extern X86NativeRegisterPool rX86_ARG4; +extern X86NativeRegisterPool rX86_ARG5; +#endif extern X86NativeRegisterPool rX86_FARG0; extern X86NativeRegisterPool rX86_FARG1; extern X86NativeRegisterPool rX86_FARG2; extern X86NativeRegisterPool rX86_FARG3; +extern X86NativeRegisterPool rX86_FARG4; +extern X86NativeRegisterPool rX86_FARG5; +extern X86NativeRegisterPool rX86_FARG6; +extern X86NativeRegisterPool rX86_FARG7; extern X86NativeRegisterPool rX86_RET0; extern X86NativeRegisterPool rX86_RET1; extern X86NativeRegisterPool rX86_INVOKE_TGT; @@ -347,10 +355,16 @@ extern RegStorage rs_rX86_ARG0; extern RegStorage rs_rX86_ARG1; extern RegStorage rs_rX86_ARG2; extern RegStorage rs_rX86_ARG3; +extern RegStorage rs_rX86_ARG4; +extern RegStorage rs_rX86_ARG5; extern RegStorage rs_rX86_FARG0; extern RegStorage rs_rX86_FARG1; extern RegStorage rs_rX86_FARG2; extern RegStorage rs_rX86_FARG3; +extern RegStorage rs_rX86_FARG4; +extern RegStorage rs_rX86_FARG5; +extern RegStorage rs_rX86_FARG6; +extern RegStorage rs_rX86_FARG7; extern RegStorage rs_rX86_RET0; extern RegStorage rs_rX86_RET1; extern RegStorage rs_rX86_INVOKE_TGT; @@ -363,6 +377,9 @@ const RegLocation x86_loc_c_return const RegLocation x86_loc_c_return_wide {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG}; +const RegLocation x86_64_loc_c_return_wide + {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, + RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG}; const RegLocation x86_loc_c_return_float {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG}; @@ -505,6 +522,7 @@ enum X86OpCode { UnaryOpcode(kX86Divmod, DaR, DaM, DaA), UnaryOpcode(kX86Idivmod, DaR, DaM, DaA), kx86Cdq32Da, + kx86Cqo64Da, kX86Bswap32R, kX86Push32R, kX86Pop32R, #undef UnaryOpcode @@ -518,8 +536,12 @@ enum X86OpCode { kX86MovssAR, Binary0fOpCode(kX86Cvtsi2sd), // int to double Binary0fOpCode(kX86Cvtsi2ss), // int to float + Binary0fOpCode(kX86Cvtsqi2sd), // long to double + Binary0fOpCode(kX86Cvtsqi2ss), // long to float Binary0fOpCode(kX86Cvttsd2si), // truncating double to int Binary0fOpCode(kX86Cvttss2si), // truncating float to int + Binary0fOpCode(kX86Cvttsd2sqi), // truncating double to long + Binary0fOpCode(kX86Cvttss2sqi), // truncating float to long Binary0fOpCode(kX86Cvtsd2si), // rounding double to int Binary0fOpCode(kX86Cvtss2si), // rounding float to int Binary0fOpCode(kX86Ucomisd), // unordered double compare @@ -587,11 +609,15 @@ enum X86OpCode { kX86MovhpsRM, kX86MovhpsRA, // load packed single FP values from m64 to high quadword of xmm kX86MovhpsMR, kX86MovhpsAR, // store packed single FP values from high quadword of xmm to m64 Binary0fOpCode(kX86Movdxr), // move into xmm from gpr + Binary0fOpCode(kX86Movqxr), // move into xmm from 64 bit gpr + kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR, // move into 64 bit reg from xmm kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm + kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA, // move 32 bit to 64 bit with sign extension kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand kX86Mfence, // memory barrier Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply + Binary0fOpCode(kX86Imul64), // 64bit multiply kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR, // compare and exchange kX86LockCmpxchgMR, kX86LockCmpxchgAR, // locked compare and exchange kX86LockCmpxchg8bM, kX86LockCmpxchg8bA, // locked compare and exchange |