diff options
Diffstat (limited to 'compiler')
40 files changed, 1443 insertions, 294 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 4f9f31251f..6d2f5d1ab1 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -84,6 +84,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator.cc \ optimizing/code_generator_arm.cc \ optimizing/code_generator_x86.cc \ + optimizing/code_generator_x86_64.cc \ optimizing/graph_visualizer.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index aa50b458c4..84e1a9451b 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -755,7 +755,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, support_list_size = arraysize(x86_64_support_list); } - for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) { + for (unsigned int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) { BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx); if (bb == NULL) continue; if (bb->block_type == kDead) continue; @@ -884,15 +884,13 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, (1 << kBBOpt) | (1 << kMatch) | (1 << kPromoteCompilerTemps)); - } - - if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) { - // TODO(Arm64): enable optimizations once backend is mature enough. + } else if (cu.instruction_set == kX86_64) { // TODO(X86_64): enable optimizations once backend is mature enough. cu.disable_opt = ~(uint32_t)0; - if (cu.instruction_set == kArm64) { - cu.enable_debug |= (1 << kDebugCodegenDump); - } + } else if (cu.instruction_set == kArm64) { + // TODO(Arm64): enable optimizations once backend is mature enough. + cu.disable_opt = ~(uint32_t)0; + cu.enable_debug |= (1 << kDebugCodegenDump); } cu.StartTimingSplit("BuildMIRGraph"); diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index a2676c82ca..63a55707e5 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -586,7 +586,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ if (current_method_ == 0) { DCHECK(entry_block_ == NULL); DCHECK(exit_block_ == NULL); - DCHECK_EQ(num_blocks_, 0); + DCHECK_EQ(num_blocks_, 0U); // Use id 0 to represent a null block. BasicBlock* null_block = NewMemBB(kNullBlock, num_blocks_++); DCHECK_EQ(null_block->id, NullBasicBlockId); diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index b6cec662c3..27b8ca43aa 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -587,7 +587,7 @@ class MIRGraph { return m_units_[m_unit_index]->GetCodeItem()->insns_; } - int GetNumBlocks() const { + unsigned int GetNumBlocks() const { return num_blocks_; } @@ -607,7 +607,7 @@ class MIRGraph { return exit_block_; } - BasicBlock* GetBasicBlock(int block_id) const { + BasicBlock* GetBasicBlock(unsigned int block_id) const { return (block_id == NullBasicBlockId) ? NULL : block_list_.Get(block_id); } @@ -1149,7 +1149,7 @@ class MIRGraph { ArenaBitVector* try_block_addr_; BasicBlock* entry_block_; BasicBlock* exit_block_; - int num_blocks_; + unsigned int num_blocks_; const DexFile::CodeItem* current_code_item_; GrowableArray<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache. std::vector<DexCompilationUnit*> m_units_; // List of methods included in this graph diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index a895e6ec34..5083bbcc15 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1628,7 +1628,7 @@ void ArmMir2Lir::AssembleLIR() { CreateNativeGcMap(); } -int ArmMir2Lir::GetInsnSize(LIR* lir) { +size_t ArmMir2Lir::GetInsnSize(LIR* lir) { DCHECK(!IsPseudoLirOp(lir->opcode)); return EncodingMap[lir->opcode].size; } diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 8db7d4ee73..95bcfbd0fc 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -87,7 +87,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); - int GetInsnSize(LIR* lir); + size_t GetInsnSize(LIR* lir) OVERRIDE; bool IsUnconditionalBranch(LIR* lir); // Check support for volatile load/store of a given size. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 2c4f26216f..93621471f7 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -887,7 +887,7 @@ void Arm64Mir2Lir::AssembleLIR() { CreateNativeGcMap(); } -int Arm64Mir2Lir::GetInsnSize(LIR* lir) { +size_t Arm64Mir2Lir::GetInsnSize(LIR* lir) { ArmOpcode opcode = UNWIDE(lir->opcode); DCHECK(!IsPseudoLirOp(opcode)); return EncodingMap[opcode].size; diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index bf09b8610e..9a80c69918 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -86,7 +86,7 @@ class Arm64Mir2Lir : public Mir2Lir { std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); - int GetInsnSize(LIR* lir); + size_t GetInsnSize(LIR* lir) OVERRIDE; bool IsUnconditionalBranch(LIR* lir); // Check support for volatile load/store of a given size. diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 8f6d716ecb..f9081cea08 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -456,7 +456,8 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { * this is an uncommon operation and isn't especially performance * critical. */ - RegStorage r_src = AllocTemp(); + // This is addressing the stack, which may be out of the 4G area. + RegStorage r_src = cu_->target64 ? AllocTempWide() : AllocTemp(); RegStorage r_dst = AllocTemp(); RegStorage r_idx = AllocTemp(); RegStorage r_val; diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 2af847c7df..a90a06e1ba 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -155,7 +155,12 @@ void Mir2Lir::CallRuntimeHelperRegLocation(ThreadOffset<pointer_size> helper_off if (arg0.wide == 0) { LoadValueDirectFixed(arg0, TargetReg(kArg0)); } else { - RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1)); + RegStorage r_tmp; + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1)); + } LoadValueDirectWideFixed(arg0, r_tmp); } ClobberCallerSave(); @@ -181,7 +186,12 @@ void Mir2Lir::CallRuntimeHelperImmRegLocation(ThreadOffset<pointer_size> helper_ if (arg1.wide == 0) { LoadValueDirectFixed(arg1, TargetReg(kArg1)); } else { - RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2)); + RegStorage r_tmp; + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2)); + } LoadValueDirectWideFixed(arg1, r_tmp); } LoadConstant(TargetReg(kArg0), arg0); @@ -279,6 +289,12 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size> LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1)); } else if (cu_->instruction_set == kArm64) { LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1)); + } else if (cu_->instruction_set == kX86_64) { + if (arg0.fp) { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0)); + } else { + LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1)); + } } else { LoadValueDirectFixed(arg1, TargetReg(kArg1)); } @@ -423,7 +439,12 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<pointer_si if (arg2.wide == 0) { LoadValueDirectFixed(arg2, TargetReg(kArg2)); } else { - RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)); + RegStorage r_tmp; + if (cu_->instruction_set == kX86_64) { + r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg()); + } else { + r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)); + } LoadValueDirectWideFixed(arg2, r_tmp); } LoadConstant(TargetReg(kArg0), arg0); diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc index b26ab579c3..c7e9190ed9 100644 --- a/compiler/dex/quick/mips/assemble_mips.cc +++ b/compiler/dex/quick/mips/assemble_mips.cc @@ -709,7 +709,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(CodeOffset start_addr) { return res; } -int MipsMir2Lir::GetInsnSize(LIR* lir) { +size_t MipsMir2Lir::GetInsnSize(LIR* lir) { DCHECK(!IsPseudoLirOp(lir->opcode)); return EncodingMap[lir->opcode].size; } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index 62a7f2455c..571adaccc1 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -85,7 +85,7 @@ class MipsMir2Lir FINAL : public Mir2Lir { std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); - int GetInsnSize(LIR* lir); + size_t GetInsnSize(LIR* lir) OVERRIDE; bool IsUnconditionalBranch(LIR* lir); // Check support for volatile load/store of a given size. diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index ca4d0e48bf..9155677c27 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -1162,7 +1162,7 @@ class Mir2Lir : public Backend { virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0; virtual ResourceMask GetPCUseDefEncoding() const = 0; virtual uint64_t GetTargetInstFlags(int opcode) = 0; - virtual int GetInsnSize(LIR* lir) = 0; + virtual size_t GetInsnSize(LIR* lir) = 0; virtual bool IsUnconditionalBranch(LIR* lir) = 0; // Check support for volatile load/store of a given size. diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index d37ee67647..c7e289d704 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -506,9 +506,80 @@ static uint8_t LowRegisterBits(int32_t raw_reg) { return low_reg; } +static bool HasModrm(const X86EncodingMap* entry) { + switch (entry->kind) { + case kNullary: return false; + case kRegOpcode: return false; + default: return true; + } +} + +static bool HasSib(const X86EncodingMap* entry) { + switch (entry->kind) { + case kArray: return true; + case kArrayReg: return true; + case kRegArray: return true; + case kArrayImm: return true; + case kRegArrayImm: return true; + case kShiftArrayImm: return true; + case kShiftArrayCl: return true; + case kArrayCond: return true; + case kCall: + switch (entry->opcode) { + case kX86CallA: return true; + default: return false; + } + case kPcRel: return true; + switch (entry->opcode) { + case kX86PcRelLoadRA: return true; + default: return false; + } + default: return false; + } +} + +static bool ModrmIsRegReg(const X86EncodingMap* entry) { + switch (entry->kind) { + // There is no modrm for this kind of instruction, therefore the reg doesn't form part of the + // modrm: + case kNullary: return true; + case kRegOpcode: return true; + case kMovRegImm: return true; + // Regular modrm value of 3 cases, when there is one register the other register holds an + // opcode so the base register is special. + case kReg: return true; + case kRegReg: return true; + case kRegRegStore: return true; + case kRegImm: return true; + case kRegRegImm: return true; + case kRegRegImmStore: return true; + case kShiftRegImm: return true; + case kShiftRegCl: return true; + case kRegCond: return true; + case kRegRegCond: return true; + case kJmp: + switch (entry->opcode) { + case kX86JmpR: return true; + default: return false; + } + case kCall: + switch (entry->opcode) { + case kX86CallR: return true; + default: return false; + } + default: return false; + } +} + size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index, - int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form, - int32_t displacement) { + int32_t raw_base, int32_t displacement) { + bool has_modrm = HasModrm(entry); + bool has_sib = HasSib(entry); + bool r8_form = entry->skeleton.r8_form; + bool modrm_is_reg_reg = ModrmIsRegReg(entry); + if (has_sib) { + DCHECK(!modrm_is_reg_reg); + } size_t size = 0; if (entry->skeleton.prefix1 > 0) { ++size; @@ -517,15 +588,19 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int } } if (Gen64Bit() || kIsDebugBuild) { - bool registers_need_rex_prefix = - NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base) || - (r8_form && RegStorage::RegNum(raw_reg) > 4) || - (r8_reg_reg_form && RegStorage::RegNum(raw_base) > 4); - if (registers_need_rex_prefix && - entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) { - DCHECK(Gen64Bit()) << "Attempt to use " << entry->name << " on a non-byte register " - << RegStorage::RegNum(raw_reg); - ++size; // rex + bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base); + if (r8_form) { + // Do we need an empty REX prefix to normalize byte registers? + registers_need_rex_prefix = registers_need_rex_prefix || (RegStorage::RegNum(raw_reg) >= 4); + registers_need_rex_prefix = registers_need_rex_prefix || + (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4)); + } + if (registers_need_rex_prefix) { + DCHECK(Gen64Bit()) << "Attempt to use a 64-bit only addressable register " + << RegStorage::RegNum(raw_reg) << " with instruction " << entry->name; + if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) { + ++size; // rex + } } } ++size; // opcode @@ -535,89 +610,72 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int ++size; } } - ++size; // modrm - if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum() - || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) { - // SP requires a SIB byte. - // GS access also needs a SIB byte for absolute adressing in 64-bit mode. - ++size; + if (has_modrm) { + ++size; // modrm } - if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) { - // BP requires an explicit displacement, even when it's 0. - if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) { - DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name; + if (!modrm_is_reg_reg) { + if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum() + || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) { + // SP requires a SIB byte. + // GS access also needs a SIB byte for absolute adressing in 64-bit mode. + ++size; + } + if (displacement != 0 || LowRegisterBits(raw_base) == rs_rBP.GetRegNum()) { + // BP requires an explicit displacement, even when it's 0. + if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) { + DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name; + } + size += IS_SIMM8(displacement) ? 1 : 4; } - size += IS_SIMM8(displacement) ? 1 : 4; } size += entry->skeleton.immediate_bytes; return size; } -int X86Mir2Lir::GetInsnSize(LIR* lir) { +size_t X86Mir2Lir::GetInsnSize(LIR* lir) { DCHECK(!IsPseudoLirOp(lir->opcode)); const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode]; DCHECK_EQ(entry->opcode, lir->opcode) << entry->name; + switch (entry->kind) { case kData: return 4; // 4 bytes of data. case kNop: return lir->operands[0]; // Length of nop is sole operand. case kNullary: - // Substract 1 for modrm which isn't used. - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0) - 1; + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0); case kRegOpcode: // lir operands - 0: reg - // Substract 1 for modrm which isn't used. - DCHECK_EQ(false, entry->skeleton.r8_form); - // Note: RegOpcode form passes reg as REX_R but encodes it as REX_B. - return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - 1; + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0); case kReg: // lir operands - 0: reg - // Note: Reg form passes reg as REX_R but encodes it as REX_B. - return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, - false, entry->skeleton.r8_form, false, 0); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0); case kMem: // lir operands - 0: base, 1: disp - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false, - lir->operands[1]); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]); case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false, - lir->operands[3]); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]); case kMemReg: // lir operands - 0: base, 1: disp, 2: reg - return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], - false, entry->skeleton.r8_form, false, lir->operands[1]); + return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]); case kMemRegImm: // lir operands - 0: base, 1: disp, 2: reg 3: immediate - return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], - false, entry->skeleton.r8_form, false, lir->operands[1]); + return ComputeSize(entry, lir->operands[2], NO_REG, lir->operands[0], lir->operands[1]); case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0], - true, entry->skeleton.r8_form, false, lir->operands[3]); + lir->operands[3]); case kThreadReg: // lir operands - 0: disp, 1: reg - DCHECK_EQ(false, entry->skeleton.r8_form); // Thread displacement size is always 32bit. - return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, false, false, false, - 0x12345678); + return ComputeSize(entry, lir->operands[1], NO_REG, NO_REG, 0x12345678); case kRegReg: // lir operands - 0: reg1, 1: reg2 - // Note: RegReg form passes reg2 as index but encodes it using base. - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, - false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0); case kRegRegStore: // lir operands - 0: reg2, 1: reg1 - // Note: RegRegStore form passes reg1 as index but encodes it using base. - return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, - false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); + return ComputeSize(entry, lir->operands[1], NO_REG, lir->operands[0], 0); case kRegMem: // lir operands - 0: reg, 1: base, 2: disp - return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], - false, entry->skeleton.r8_form, false, lir->operands[2]); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]); case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1], - true, entry->skeleton.r8_form, false, lir->operands[4]); + lir->operands[4]); case kRegThread: // lir operands - 0: reg, 1: disp // Thread displacement size is always 32bit. - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, false, false, false, - 0x12345678); + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0x12345678); case kRegImm: { // lir operands - 0: reg, 1: immediate - size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, - false, entry->skeleton.r8_form, false, 0); + size_t size = ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0); // AX opcodes don't require the modrm byte. if (entry->skeleton.ax_opcode == 0) { return size; @@ -626,83 +684,62 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } } case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], - false, false, false, lir->operands[1]); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]); case kArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], - true, false, false, lir->operands[3]); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]); case kThreadImm: // lir operands - 0: disp, 1: imm // Thread displacement size is always 32bit. - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678); + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678); case kRegRegImm: // lir operands - 0: reg1, 1: reg2, 2: imm // Note: RegRegImm form passes reg2 as index but encodes it using base. - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, - false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, 0); case kRegRegImmStore: // lir operands - 0: reg2, 1: reg1, 2: imm // Note: RegRegImmStore form passes reg1 as index but encodes it using base. - return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, - false, entry->skeleton.r8_form, entry->skeleton.r8_form, 0); + return ComputeSize(entry, lir->operands[1], lir->operands[0], NO_REG, 0); case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm - return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], - false, entry->skeleton.r8_form, false, lir->operands[2]); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]); case kRegArrayImm: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1], - true, entry->skeleton.r8_form, false, lir->operands[4]); + lir->operands[4]); case kMovRegImm: // lir operands - 0: reg, 1: immediate return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 + entry->skeleton.immediate_bytes; case kShiftRegImm: // lir operands - 0: reg, 1: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, - false, entry->skeleton.r8_form, false, 0) - + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0) - (lir->operands[1] == 1 ? 1 : 0); case kShiftMemImm: // lir operands - 0: base, 1: disp, 2: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], - false, entry->skeleton.r8_form, false, lir->operands[1]) - + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]) - (lir->operands[2] == 1 ? 1 : 0); case kShiftArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], - true, entry->skeleton.r8_form, false, lir->operands[3]) - + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]) - (lir->operands[4] == 1 ? 1 : 0); case kShiftRegCl: // lir operands - 0: reg, 1: cl DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[1])); // Note: ShiftRegCl form passes reg as reg but encodes it using base. - return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, - false, entry->skeleton.r8_form, false, 0); + return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, 0); case kShiftMemCl: // lir operands - 0: base, 1: disp, 2: cl - DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[2])); - return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], - false, false, false, lir->operands[1]); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]); case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cl - DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(lir->operands[4])); return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0], - true, false, false, lir->operands[3]); + lir->operands[3]); case kRegCond: // lir operands - 0: reg, 1: cond - return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG, - false, entry->skeleton.r8_form, false, 0); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], 0); case kMemCond: // lir operands - 0: base, 1: disp, 2: cond - DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false, - lir->operands[1]); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]); case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false, - lir->operands[3]); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]); case kRegRegCond: // lir operands - 0: reg1, 1: reg2, 2: cond - // Note: RegRegCond form passes reg2 as index but encodes it using base. DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, false, false, false, 0); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], 0); case kRegMemCond: // lir operands - 0: reg, 1: base, 2: disp, 3:cond DCHECK_EQ(false, entry->skeleton.r8_form); - return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], false, false, false, - lir->operands[2]); + return ComputeSize(entry, lir->operands[0], NO_REG, lir->operands[1], lir->operands[2]); case kJcc: if (lir->opcode == kX86Jcc8) { return 2; // opcode + rel8 @@ -717,7 +754,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { return 5; // opcode + rel32 } else if (lir->opcode == kX86JmpT) { // Thread displacement size is always 32bit. - return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678); + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678); } else { DCHECK(lir->opcode == kX86JmpR); if (NeedsRex(lir->operands[0])) { @@ -731,14 +768,12 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kX86CallI: return 5; // opcode 0:disp case kX86CallR: return 2; // opcode modrm case kX86CallM: // lir operands - 0: base, 1: disp - return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], false, false, false, - lir->operands[1]); + return ComputeSize(entry, NO_REG, NO_REG, lir->operands[0], lir->operands[1]); case kX86CallA: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], true, false, false, - lir->operands[3]); + return ComputeSize(entry, NO_REG, lir->operands[1], lir->operands[0], lir->operands[3]); case kX86CallT: // lir operands - 0: disp // Thread displacement size is always 32bit. - return ComputeSize(entry, NO_REG, NO_REG, NO_REG, false, false, false, 0x12345678); + return ComputeSize(entry, NO_REG, NO_REG, NO_REG, 0x12345678); default: break; } @@ -748,7 +783,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table // Force the displacement size to 32bit, it will hold a computed offset later. return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1], - true, false, false, 0x12345678); + 0x12345678); } else { DCHECK_EQ(entry->opcode, kX86PcRelAdr); return 5; // opcode with reg + 4 byte immediate @@ -757,7 +792,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], - lir->operands[0], NO_REG, NO_REG, false, false, false, 0) - + lir->operands[0], NO_REG, NO_REG, 0) - // Shorter ax encoding. (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); case kUnimplemented: @@ -801,8 +836,7 @@ void X86Mir2Lir::CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw } void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, - int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b, - bool r8_form) { + int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) { // REX.WRXB // W - 64-bit operand // R - MODRM.reg @@ -812,9 +846,17 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, bool r = NeedsRex(raw_reg_r); bool x = NeedsRex(raw_reg_x); bool b = NeedsRex(raw_reg_b); + bool r8_form = entry->skeleton.r8_form; + bool modrm_is_reg_reg = ModrmIsRegReg(entry); + uint8_t rex = 0; - if (r8_form && RegStorage::RegNum(raw_reg_r) > 4) { - rex |= 0x40; // REX.0000 + if (r8_form) { + // Do we need an empty REX prefix to normalize byte register addressing? + if (RegStorage::RegNum(raw_reg_r) >= 4) { + rex |= 0x40; // REX.0000 + } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) { + rex |= 0x40; // REX.0000 + } } if (w) { rex |= 0x48; // REX.W000 @@ -875,9 +917,8 @@ void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) { } void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry, - int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b, - bool r8_form) { - EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form); + int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b) { + EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b); EmitOpcode(entry); } @@ -971,7 +1012,7 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) { void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); @@ -979,7 +1020,7 @@ void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) { void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg); // There's no 3-byte instruction with +rd DCHECK(entry->skeleton.opcode != 0x0F || (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A)); @@ -992,7 +1033,7 @@ void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) { void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) { CheckValidByteRegister(entry, raw_reg); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg); uint8_t low_reg = LowRegisterBits(raw_reg); uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; code_buffer_.push_back(modrm); @@ -1002,7 +1043,7 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) { void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, NO_REG, NO_REG, raw_base, false); + EmitPrefix(entry, NO_REG, NO_REG, raw_base); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1016,7 +1057,7 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_ void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale, int32_t disp) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false); + EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base); uint8_t low_index = LowRegisterBits(raw_index); uint8_t low_base = LowRegisterBits(raw_base); EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp); @@ -1027,7 +1068,7 @@ void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int3 void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t raw_reg) { CheckValidByteRegister(entry, raw_reg); - EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base); uint8_t low_reg = LowRegisterBits(raw_reg); uint8_t low_base = LowRegisterBits(raw_base); EmitModrmDisp(low_reg, low_base, disp); @@ -1045,7 +1086,7 @@ void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, int32_t raw_reg, int32_ void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, int32_t raw_index, int scale, int32_t disp) { CheckValidByteRegister(entry, raw_reg); - EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base); uint8_t low_reg = LowRegisterBits(raw_reg); uint8_t low_index = LowRegisterBits(raw_index); uint8_t low_base = LowRegisterBits(raw_base); @@ -1064,7 +1105,7 @@ void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, int32_t raw_base, int void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base); uint8_t low_base = LowRegisterBits(raw_base); EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); @@ -1075,7 +1116,7 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index, int scale, int32_t disp, int32_t imm) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false); + EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base); uint8_t low_index = LowRegisterBits(raw_index); uint8_t low_base = LowRegisterBits(raw_base); EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp); @@ -1086,7 +1127,7 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) { DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_NE(entry->skeleton.prefix1, 0); - EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false); + EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG); uint8_t low_reg = LowRegisterBits(raw_reg); EmitModrmThread(low_reg); code_buffer_.push_back(disp & 0xFF); @@ -1101,7 +1142,7 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) { CheckValidByteRegister(entry, raw_reg1); CheckValidByteRegister(entry, raw_reg2); - EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2); uint8_t low_reg1 = LowRegisterBits(raw_reg1); uint8_t low_reg2 = LowRegisterBits(raw_reg2); uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2; @@ -1114,7 +1155,7 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32 void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2, int32_t imm) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2); uint8_t low_reg1 = LowRegisterBits(raw_reg1); uint8_t low_reg2 = LowRegisterBits(raw_reg2); uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2; @@ -1128,7 +1169,7 @@ void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) { DCHECK(!RegStorage::IsFloat(raw_reg)); CheckValidByteRegister(entry, raw_reg); - EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base); uint8_t low_reg = LowRegisterBits(raw_reg); uint8_t low_base = LowRegisterBits(raw_base); EmitModrmDisp(low_reg, low_base, disp); @@ -1145,7 +1186,7 @@ void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry, void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) { CheckValidByteRegister(entry, raw_reg); - EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg); if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) { code_buffer_.push_back(entry->skeleton.ax_opcode); } else { @@ -1158,7 +1199,8 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_ } void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) { - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); + DCHECK_EQ(false, entry->skeleton.r8_form); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); EmitModrmThread(entry->skeleton.modrm_opcode); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1170,7 +1212,7 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_ void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg); uint8_t low_reg = LowRegisterBits(raw_reg); code_buffer_.push_back(0xB8 + low_reg); switch (entry->skeleton.immediate_bytes) { @@ -1198,7 +1240,7 @@ void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) { CheckValidByteRegister(entry, raw_reg); - EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1221,7 +1263,7 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, i void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) { CheckValidByteRegister(entry, raw_reg); DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl)); - EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1237,7 +1279,7 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, int32_t displacement, int32_t raw_cl) { DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl)); - EmitPrefix(entry, NO_REG, NO_REG, raw_base, false); + EmitPrefix(entry, NO_REG, NO_REG, raw_base); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1251,7 +1293,7 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, int32_t raw_base, void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp, int32_t imm) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, NO_REG, NO_REG, raw_base, false); + EmitPrefix(entry, NO_REG, NO_REG, raw_base); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1272,7 +1314,7 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) { CheckValidByteRegister(entry, raw_reg); - EmitPrefix(entry, raw_reg, NO_REG, NO_REG, entry->skeleton.r8_form); + EmitPrefix(entry, NO_REG, NO_REG, raw_reg); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0x0F, entry->skeleton.opcode); code_buffer_.push_back(0x0F); @@ -1315,7 +1357,7 @@ void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, int32_t raw_reg1, i int32_t cc) { // Generate prefix and opcode without the condition. DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2); // Now add the condition. The last byte of opcode is the one that receives it. DCHECK_GE(cc, 0); @@ -1341,7 +1383,7 @@ void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, int32_t raw_reg1, i int32_t disp, int32_t cc) { // Generate prefix and opcode without the condition. DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false); + EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base); // Now add the condition. The last byte of opcode is the one that receives it. DCHECK_GE(cc, 0); @@ -1376,7 +1418,7 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int32_t rel) { } else { DCHECK(entry->opcode == kX86JmpR); DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, NO_REG, NO_REG, rel, false); + EmitPrefix(entry, NO_REG, NO_REG, rel); code_buffer_.push_back(entry->skeleton.opcode); uint8_t low_reg = LowRegisterBits(rel); uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg; @@ -1404,7 +1446,7 @@ void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int32_t rel, int32_t cc) { void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base); uint8_t low_base = LowRegisterBits(raw_base); EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); @@ -1413,7 +1455,7 @@ void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int3 void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); DCHECK_EQ(4, entry->skeleton.immediate_bytes); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1425,7 +1467,7 @@ void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) { void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) { DCHECK_EQ(false, entry->skeleton.r8_form); DCHECK_NE(entry->skeleton.prefix1, 0); - EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); EmitModrmThread(entry->skeleton.modrm_opcode); code_buffer_.push_back(disp & 0xFF); code_buffer_.push_back((disp >> 8) & 0xFF); @@ -1450,7 +1492,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t } if (entry->opcode == kX86PcRelLoadRA) { DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false); + EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1479,7 +1521,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) { DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name; DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false); + EmitPrefix(entry, raw_reg, NO_REG, NO_REG); code_buffer_.push_back(0xE8); // call +0 code_buffer_.push_back(0); code_buffer_.push_back(0); @@ -1496,7 +1538,7 @@ void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " " << BuildInsnString(entry->fmt, lir, 0); - for (int i = 0; i < GetInsnSize(lir); ++i) { + for (size_t i = 0; i < GetInsnSize(lir); ++i) { code_buffer_.push_back(0xCC); // push breakpoint instruction - int 3 } } @@ -1793,8 +1835,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitUnimplemented(entry, lir); break; } - CHECK_EQ(static_cast<size_t>(GetInsnSize(lir)), - code_buffer_.size() - starting_cbuf_size) + DCHECK_EQ(lir->flags.size, GetInsnSize(lir)); + CHECK_EQ(lir->flags.size, code_buffer_.size() - starting_cbuf_size) << "Instruction size mismatch for entry: " << X86Mir2Lir::EncodingMap[lir->opcode].name; } return res; diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 6ae553dab3..3540843705 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -121,7 +121,7 @@ class X86Mir2Lir : public Mir2Lir { std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); ResourceMask GetPCUseDefEncoding() const OVERRIDE; uint64_t GetTargetInstFlags(int opcode); - int GetInsnSize(LIR* lir); + size_t GetInsnSize(LIR* lir) OVERRIDE; bool IsUnconditionalBranch(LIR* lir); // Check support for volatile load/store of a given size. @@ -392,15 +392,13 @@ class X86Mir2Lir : public Mir2Lir { protected: size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index, - int32_t raw_base, bool has_sib, bool r8_form, bool r8_reg_reg_form, - int32_t displacement); + int32_t raw_base, int32_t displacement); void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg); void EmitPrefix(const X86EncodingMap* entry, - int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b, - bool r8_form); + int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b); void EmitOpcode(const X86EncodingMap* entry); void EmitPrefixAndOpcode(const X86EncodingMap* entry, - int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form); + int32_t reg_r, int32_t reg_x, int32_t reg_b); void EmitDisp(uint8_t base, int32_t disp); void EmitModrmThread(uint8_t reg_or_opcode); void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp); @@ -464,6 +462,12 @@ class X86Mir2Lir : public Mir2Lir { virtual RegStorage AllocateByteRegister(); /* + * @brief Check if a register is byte addressable. + * @returns true if a register is byte addressable. + */ + bool IsByteRegister(RegStorage reg); + + /* * @brief generate inline code for fast case of Strng.indexOf. * @param info Call parameters * @param zero_based 'true' if the index into the string is 0. diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index ced64009e6..f6f06170bb 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -381,7 +381,7 @@ void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); } // If the result reg can't be byte accessed, use a jump and move instead of a set. - if (rl_result.reg.GetReg() >= rs_rX86_SP.GetReg()) { + if (!IsByteRegister(rl_result.reg)) { LIR* branch2 = NULL; if (unordered_gt) { branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 4a77df2198..05b5e4354d 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -882,10 +882,9 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; - // SETcc only works with EAX..EDX. - if (result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) { + // For 32-bit, SETcc only works with EAX..EDX. + if (!IsByteRegister(result_reg)) { result_reg = AllocateByteRegister(); - DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum()); } NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ); NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg()); @@ -1386,9 +1385,9 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, if (!Gen64Bit()) { x86op = GetOpcode(op, rl_dest, rl_src, true); lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); + AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, + true /* is_load */, true /* is64bit */); } - AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, - true /* is_load */, true /* is64bit */); } void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { @@ -1423,11 +1422,11 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instructi if (!Gen64Bit()) { x86op = GetOpcode(op, rl_dest, rl_src, true); lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); + AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, + true /* is_load */, true /* is64bit */); + AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, + false /* is_load */, true /* is64bit */); } - AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, - true /* is_load */, true /* is64bit */); - AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, - false /* is_load */, true /* is64bit */); FreeTemp(rl_src.reg); } @@ -1760,8 +1759,7 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, rl_src = LoadValue(rl_src, reg_class); } // If the src reg can't be byte accessed, move it to a temp first. - if ((size == kSignedByte || size == kUnsignedByte) && - rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) { + if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) { RegStorage temp = AllocTemp(); OpRegCopy(temp, rl_src.reg); StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size); @@ -2240,10 +2238,9 @@ void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; - // SETcc only works with EAX..EDX. - if (result_reg == object.reg || result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) { + // For 32-bit, SETcc only works with EAX..EDX. + if (result_reg == object.reg || !IsByteRegister(result_reg)) { result_reg = AllocateByteRegister(); - DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum()); } // Assume that there is no match. @@ -2355,7 +2352,7 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */ RegLocation rl_result = GetReturn(kRefReg); - // SETcc only works with EAX..EDX. + // For 32-bit, SETcc only works with EAX..EDX. DCHECK_LT(rl_result.reg.GetRegNum(), 4); // Is the class NULL? @@ -2655,6 +2652,7 @@ void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { Mir2Lir::GenIntToLong(rl_dest, rl_src); return; } + rl_src = UpdateLoc(rl_src); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (rl_src.location == kLocPhysReg) { NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index d1ba2398c5..483d8cf257 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -424,7 +424,15 @@ void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) { } RegStorage X86Mir2Lir::AllocateByteRegister() { - return AllocTypedTemp(false, kCoreReg); + RegStorage reg = AllocTypedTemp(false, kCoreReg); + if (!Gen64Bit()) { + DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum()); + } + return reg; +} + +bool X86Mir2Lir::IsByteRegister(RegStorage reg) { + return Gen64Bit() || reg.GetRegNum() < rs_rX86_SP.GetRegNum(); } /* Clobber all regs that might be used by an external C call */ diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index c72e8cd1d9..b93e3e8833 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -138,6 +138,7 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { case kOpLsl: opcode = kX86Sal64RI; break; case kOpLsr: opcode = kX86Shr64RI; break; case kOpAsr: opcode = kX86Sar64RI; break; + case kOpCmp: opcode = byte_imm ? kX86Cmp64RI8 : kX86Cmp64RI; break; default: LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op; } @@ -505,7 +506,7 @@ LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r5sib_no_base /* base */, r_src.GetReg() /* index */, value /* scale */, 0 /* disp */); } else if (op == kOpAdd) { // lea add special case - return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), + return NewLIR5(r_dest.Is64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */); } diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 43243254f1..e26745ad5e 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -117,6 +117,16 @@ void MIRGraph::ComputeDFSOrders() { RecordDFSOrders(GetEntryBlock()); num_reachable_blocks_ = dfs_order_->Size(); + + if (num_reachable_blocks_ != num_blocks_) { + // Hide all unreachable blocks. + AllNodesIterator iter(this); + for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { + if (!bb->visited) { + bb->Hide(cu_); + } + } + } } /* diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 16c1e00c83..3e326f0633 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1918,7 +1918,7 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t } } uint64_t duration_ns = NanoTime() - start_ns; - if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning())) { + if (duration_ns > MsToNs(compiler_->GetMaximumCompilationTimeBeforeWarning()) && !kIsDebugBuild) { LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file) << " took " << PrettyDuration(duration_ns); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index fad67983dd..9903421338 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -28,6 +28,7 @@ #include "compiled_method.h" #include "compiler.h" #include "dex_file.h" +#include "driver/compiler_options.h" #include "instruction_set.h" #include "invoke_type.h" #include "method_reference.h" @@ -105,8 +106,7 @@ class CompilerDriver { InstructionSetFeatures instruction_set_features, bool image, DescriptorSet* image_classes, size_t thread_count, bool dump_stats, bool dump_passes, - CumulativeLogger* timer, - std::string profile_file = ""); + CumulativeLogger* timer, std::string profile_file = ""); ~CompilerDriver(); @@ -394,6 +394,10 @@ class CompilerDriver { return dump_passes_; } + bool DidIncludeDebugSymbols() const { + return compiler_options_->GetIncludeDebugSymbols(); + } + CumulativeLogger* GetTimingsLogger() const { return timings_logger_; } diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 05a9ac7ee9..5d1c5dadaf 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -42,6 +42,7 @@ class CompilerOptions { static const size_t kDefaultTinyMethodThreshold = 20; static const size_t kDefaultNumDexMethodsThreshold = 900; static constexpr double kDefaultTopKProfileThreshold = 90.0; + static const bool kDefaultIncludeDebugSymbols = kIsDebugBuild; CompilerOptions() : compiler_filter_(kDefaultCompilerFilter), @@ -51,7 +52,8 @@ class CompilerOptions { tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), generate_gdb_information_(false), - top_k_profile_threshold_(kDefaultTopKProfileThreshold) + top_k_profile_threshold_(kDefaultTopKProfileThreshold), + include_debug_symbols_(kDefaultIncludeDebugSymbols) #ifdef ART_SEA_IR_MODE , sea_ir_mode_(false) #endif @@ -64,7 +66,8 @@ class CompilerOptions { size_t tiny_method_threshold, size_t num_dex_methods_threshold, bool generate_gdb_information, - double top_k_profile_threshold + double top_k_profile_threshold, + bool include_debug_symbols #ifdef ART_SEA_IR_MODE , bool sea_ir_mode #endif @@ -76,7 +79,8 @@ class CompilerOptions { tiny_method_threshold_(tiny_method_threshold), num_dex_methods_threshold_(num_dex_methods_threshold), generate_gdb_information_(generate_gdb_information), - top_k_profile_threshold_(top_k_profile_threshold) + top_k_profile_threshold_(top_k_profile_threshold), + include_debug_symbols_(include_debug_symbols) #ifdef ART_SEA_IR_MODE , sea_ir_mode_(sea_ir_mode) #endif @@ -139,6 +143,10 @@ class CompilerOptions { return top_k_profile_threshold_; } + bool GetIncludeDebugSymbols() const { + return include_debug_symbols_; + } + #ifdef ART_SEA_IR_MODE bool GetSeaIrMode(); #endif @@ -157,6 +165,7 @@ class CompilerOptions { bool generate_gdb_information_; // When using a profile file only the top K% of the profiled samples will be compiled. double top_k_profile_threshold_; + bool include_debug_symbols_; #ifdef ART_SEA_IR_MODE bool sea_ir_mode_; #endif diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index cb66e4898a..78757ecfe2 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -807,12 +807,17 @@ bool ElfWriterQuick::Write(OatWriter* oat_writer, const std::string& android_root_unused, bool is_host_unused) { const bool debug = false; + const bool add_symbols = oat_writer->DidAddSymbols(); const OatHeader& oat_header = oat_writer->GetOatHeader(); Elf32_Word oat_data_size = oat_header.GetExecutableOffset(); uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size; ElfBuilder builder(oat_writer, elf_file_, compiler_driver_->GetInstructionSet(), 0, - oat_data_size, oat_data_size, oat_exec_size, false, debug); + oat_data_size, oat_data_size, oat_exec_size, add_symbols, debug); + + if (add_symbols) { + AddDebugSymbols(builder, oat_writer, debug); + } bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr; if (generateDebugInformation) { @@ -833,6 +838,15 @@ bool ElfWriterQuick::Write(OatWriter* oat_writer, return builder.Write(); } +void ElfWriterQuick::AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug) { + const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo(); + ElfSymtabBuilder* symtab = &builder.symtab_builder_; + for (auto it = method_info.begin(); it != method_info.end(); ++it) { + symtab->AddSymbol(it->method_name_, &builder.text_builder_, it->low_pc_, true, + it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); + } +} + static void UpdateWord(std::vector<uint8_t>*buf, int offset, int data) { (*buf)[offset+0] = data; (*buf)[offset+1] = data >> 8; diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h index f687d2e80b..dbdccfc200 100644 --- a/compiler/elf_writer_quick.h +++ b/compiler/elf_writer_quick.h @@ -48,6 +48,10 @@ class ElfWriterQuick FINAL : public ElfWriter { ~ElfWriterQuick() {} class ElfBuilder; + void AddDebugSymbols(ElfBuilder& builder, + OatWriter* oat_writer, + bool debug); + class ElfSectionBuilder { public: ElfSectionBuilder(const std::string& sec_name, Elf32_Word type, Elf32_Word flags, @@ -235,7 +239,6 @@ class ElfWriterQuick FINAL : public ElfWriter { ~ElfBuilder() {} bool Write(); - ElfSymtabBuilder* GetDefaultDynsymBuilder() { return &dynsym_builder_; } // Adds the given raw section to the builder. This will copy it. The caller // is responsible for deallocating their copy. diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 5d532abdd6..c6b9161b63 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -350,31 +350,14 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { uint32_t thumb_offset = compiled_method->CodeDelta(); quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; - std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation(); - if (cfi_info != nullptr) { - // Copy in the FDE, if present - const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo(); - if (fde != nullptr) { - // Copy the information into cfi_info and then fix the address in the new copy. - int cur_offset = cfi_info->size(); - cfi_info->insert(cfi_info->end(), fde->begin(), fde->end()); - - // Set the 'initial_location' field to address the start of the method. - uint32_t new_value = quick_code_offset - writer_->oat_header_->GetExecutableOffset(); - uint32_t offset_to_update = cur_offset + 2*sizeof(uint32_t); - (*cfi_info)[offset_to_update+0] = new_value; - (*cfi_info)[offset_to_update+1] = new_value >> 8; - (*cfi_info)[offset_to_update+2] = new_value >> 16; - (*cfi_info)[offset_to_update+3] = new_value >> 24; - std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, false); - writer_->method_info_.push_back(DebugInfo(name, new_value, new_value + code_size)); - } - } + bool force_debug_capture = false; + bool deduped = false; // Deduplicate code arrays. auto code_iter = dedupe_map_.find(compiled_method); if (code_iter != dedupe_map_.end()) { quick_code_offset = code_iter->second; + deduped = true; } else { dedupe_map_.Put(compiled_method, quick_code_offset); } @@ -409,6 +392,41 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { writer_->oat_header_->UpdateChecksum(&(*quick_code)[0], code_size); offset_ += code_size; } + + uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset(); + std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation(); + if (cfi_info != nullptr) { + // Copy in the FDE, if present + const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo(); + if (fde != nullptr) { + // Copy the information into cfi_info and then fix the address in the new copy. + int cur_offset = cfi_info->size(); + cfi_info->insert(cfi_info->end(), fde->begin(), fde->end()); + + // Set the 'initial_location' field to address the start of the method. + uint32_t offset_to_update = cur_offset + 2*sizeof(uint32_t); + (*cfi_info)[offset_to_update+0] = quick_code_start; + (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; + (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; + (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; + force_debug_capture = true; + } + } + + + if (writer_->compiler_driver_->DidIncludeDebugSymbols() || force_debug_capture) { + // Record debug information for this function if we are doing that or + // we have CFI and so need it. + std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true); + if (deduped) { + // TODO We should place the DEDUPED tag on the first instance of a + // deduplicated symbol so that it will show up in a debuggerd crash + // report. + name += " [ DEDUPED ]"; + } + writer_->method_info_.push_back(DebugInfo(name, quick_code_start, + quick_code_start + code_size)); + } } if (kIsDebugBuild) { @@ -517,7 +535,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { NullHandle<mirror::ClassLoader>(), NullHandle<mirror::ArtMethod>(), invoke_type); - CHECK(method != NULL); + CHECK(method != NULL) << PrettyMethod(it.GetMemberIndex(), *dex_file_, true); // Portable code offsets are set by ElfWriterMclinker::FixupCompiledCodeOffset after linking. method->SetQuickOatCodeOffset(offsets.code_offset_); method->SetOatNativeGcMapOffset(offsets.gc_map_offset_); diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 8c20aa8d06..dbecb95362 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -108,6 +108,10 @@ class OatWriter { return method_info_; } + bool DidAddSymbols() const { + return compiler_driver_->DidIncludeDebugSymbols(); + } + private: // The DataAccess classes are helper classes that provide access to members related to // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index f05cb66aba..b8332ad2a3 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -18,6 +18,7 @@ #include "code_generator_arm.h" #include "code_generator_x86.h" +#include "code_generator_x86_64.h" #include "dex/verified_method.h" #include "driver/dex_compilation_unit.h" #include "gc_map_builder.h" @@ -221,7 +222,7 @@ CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, return new (allocator) x86::CodeGeneratorX86(graph); } case kX86_64: { - return new (allocator) x86::CodeGeneratorX86(graph); + return new (allocator) x86_64::CodeGeneratorX86_64(graph); } default: return nullptr; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 82fa6393e0..83621e0f72 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -67,8 +67,7 @@ class CodeGenerator : public ArenaObject { // Note that this follows the current calling convention. return GetFrameSize() + kVRegSize // Art method - + (parameter->GetIndex() - graph_->GetNumberOfVRegs() + graph_->GetNumberOfInVRegs()) - * kVRegSize; + + parameter->GetIndex() * kVRegSize; } virtual void GenerateFrameEntry() = 0; @@ -158,10 +157,10 @@ class CallingConvention { return registers_[index]; } - uint8_t GetStackOffsetOf(size_t index, size_t word_size) const { + uint8_t GetStackOffsetOf(size_t index) const { // We still reserve the space for parameters passed by registers. - // Add word_size for the method pointer. - return index * kVRegSize + word_size; + // Add one for the method pointer. + return (index + 1) * kVRegSize; } private: diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d61df36ca9..212a6dc370 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -15,14 +15,14 @@ */ #include "code_generator_arm.h" -#include "utils/assembler.h" -#include "utils/arm/assembler_arm.h" -#include "utils/arm/managed_register_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array.h" #include "mirror/art_method.h" #include "thread.h" +#include "utils/assembler.h" +#include "utils/arm/assembler_arm.h" +#include "utils/arm/managed_register_arm.h" #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())-> @@ -48,7 +48,8 @@ void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), - instruction_visitor_(graph, this) {} + instruction_visitor_(graph, this), + move_resolver_(graph->GetArena(), this) {} static bool* GetBlockedRegisterPairs(bool* blocked_registers) { return blocked_registers + kNumberOfAllocIds; @@ -106,6 +107,9 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool* blocked_registers) const { // Reserve thread register. blocked_registers[TR] = true; + // Reserve temp register. + blocked_registers[IP] = true; + // TODO: We currently don't use Quick's callee saved registers. blocked_registers[R5] = true; blocked_registers[R6] = true; @@ -161,7 +165,7 @@ int32_t CodeGeneratorARM::GetStackSlot(HLocal* local) const { uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs(); if (reg_number >= number_of_vregs - number_of_in_vregs) { // Local is a parameter of the method. It is stored in the caller's frame. - return GetFrameSize() + kArmWordSize // ART method + return GetFrameSize() + kVRegSize // ART method + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize; } else { // Local is a temporary in this method. It is stored in this method's frame. @@ -210,7 +214,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (index < calling_convention.GetNumberOfRegisters()) { return ArmCoreLocation(calling_convention.GetRegisterAt(index)); } else { - return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize)); + return Location::StackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -223,7 +227,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { return Location::QuickParameter(index); } else { - return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize)); + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -254,8 +258,8 @@ void CodeGeneratorARM::Move32(Location destination, Location source) { if (source.IsRegister()) { __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex())); } else { - __ ldr(R0, Address(SP, source.GetStackIndex())); - __ str(R0, Address(SP, destination.GetStackIndex())); + __ ldr(IP, Address(SP, source.GetStackIndex())); + __ str(IP, Address(SP, destination.GetStackIndex())); } } } @@ -274,7 +278,7 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ Mov(destination.AsArm().AsRegisterPairLow(), calling_convention.GetRegisterAt(argument_index)); __ ldr(destination.AsArm().AsRegisterPairHigh(), - Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize())); + Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); } else { DCHECK(source.IsDoubleStackSlot()); if (destination.AsArm().AsRegisterPair() == R1_R2) { @@ -291,12 +295,12 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { if (source.IsRegister()) { __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow()); __ str(source.AsArm().AsRegisterPairHigh(), - Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize))); + Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1))); } else { DCHECK(source.IsDoubleStackSlot()); __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex())); __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize))); - __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize))); + __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1))); } } else { DCHECK(destination.IsDoubleStackSlot()); @@ -314,14 +318,14 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ str(calling_convention.GetRegisterAt(argument_index), Address(SP, destination.GetStackIndex())); __ ldr(R0, - Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize())); + Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize))); } else { DCHECK(source.IsDoubleStackSlot()); - __ ldr(R0, Address(SP, source.GetStackIndex())); - __ str(R0, Address(SP, destination.GetStackIndex())); - __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize))); - __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize))); + __ ldr(IP, Address(SP, source.GetStackIndex())); + __ str(IP, Address(SP, destination.GetStackIndex())); + __ ldr(IP, Address(SP, source.GetHighStackIndex(kArmWordSize))); + __ str(IP, Address(SP, destination.GetHighStackIndex(kArmWordSize))); } } } @@ -332,8 +336,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr if (location.IsRegister()) { __ LoadImmediate(location.AsArm().AsCoreRegister(), value); } else { - __ LoadImmediate(R0, value); - __ str(R0, Address(SP, location.GetStackIndex())); + __ LoadImmediate(IP, value); + __ str(IP, Address(SP, location.GetStackIndex())); } } else if (instruction->AsLongConstant() != nullptr) { int64_t value = instruction->AsLongConstant()->GetValue(); @@ -341,10 +345,10 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr __ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value)); __ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value)); } else { - __ LoadImmediate(R0, Low32Bits(value)); - __ str(R0, Address(SP, location.GetStackIndex())); - __ LoadImmediate(R0, High32Bits(value)); - __ str(R0, Address(SP, location.GetHighStackIndex(kArmWordSize))); + __ LoadImmediate(IP, Low32Bits(value)); + __ str(IP, Address(SP, location.GetStackIndex())); + __ LoadImmediate(IP, High32Bits(value)); + __ str(IP, Address(SP, location.GetHighStackIndex(kArmWordSize))); } } else if (instruction->AsLoadLocal() != nullptr) { uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal()); @@ -493,7 +497,7 @@ void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) { } void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) { - // Will be generated at use site. + codegen_->Move(constant, constant->GetLocations()->Out(), nullptr); } void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) { @@ -564,7 +568,7 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); - locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(ArmCoreLocation(R0)); InvokeDexCallingConventionVisitor calling_convention_visitor; for (size_t i = 0; i < invoke->InputCount(); i++) { @@ -811,15 +815,93 @@ void LocationsBuilderARM::VisitPhi(HPhi* instruction) { } void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; } void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unreachable"; } void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) { - LOG(FATAL) << "Unimplemented"; + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +ArmAssembler* ParallelMoveResolverARM::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverARM::EmitMove(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister()) { + if (destination.IsRegister()) { + __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister()); + } else { + DCHECK(destination.IsStackSlot()); + __ StoreToOffset(kStoreWord, source.AsArm().AsCoreRegister(), + SP, destination.GetStackIndex()); + } + } else if (source.IsStackSlot()) { + if (destination.IsRegister()) { + __ LoadFromOffset(kLoadWord, destination.AsArm().AsCoreRegister(), + SP, source.GetStackIndex()); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverARM::Exchange(Register reg, int mem) { + __ Mov(IP, reg); + __ LoadFromOffset(kLoadWord, reg, SP, mem); + __ StoreToOffset(kStoreWord, IP, SP, mem); +} + +void ParallelMoveResolverARM::Exchange(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch(this, IP, R0, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0; + __ LoadFromOffset(kLoadWord, static_cast<Register>(ensure_scratch.GetRegister()), + SP, mem1 + stack_offset); + __ LoadFromOffset(kLoadWord, IP, SP, mem2 + stack_offset); + __ StoreToOffset(kStoreWord, static_cast<Register>(ensure_scratch.GetRegister()), + SP, mem2 + stack_offset); + __ StoreToOffset(kStoreWord, IP, SP, mem1 + stack_offset); +} + +void ParallelMoveResolverARM::EmitSwap(size_t index) { + MoveOperands* move = moves_.Get(index); + Location source = move->GetSource(); + Location destination = move->GetDestination(); + + if (source.IsRegister() && destination.IsRegister()) { + DCHECK_NE(source.AsArm().AsCoreRegister(), IP); + DCHECK_NE(destination.AsArm().AsCoreRegister(), IP); + __ Mov(IP, source.AsArm().AsCoreRegister()); + __ Mov(source.AsArm().AsCoreRegister(), destination.AsArm().AsCoreRegister()); + __ Mov(destination.AsArm().AsCoreRegister(), IP); + } else if (source.IsRegister() && destination.IsStackSlot()) { + Exchange(source.AsArm().AsCoreRegister(), destination.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsRegister()) { + Exchange(destination.AsArm().AsCoreRegister(), source.GetStackIndex()); + } else if (source.IsStackSlot() && destination.IsStackSlot()) { + Exchange(source.GetStackIndex(), destination.GetStackIndex()); + } else { + LOG(FATAL) << "Unimplemented"; + } +} + +void ParallelMoveResolverARM::SpillScratch(int reg) { + __ Push(static_cast<Register>(reg)); +} + +void ParallelMoveResolverARM::RestoreScratch(int reg) { + __ Pop(static_cast<Register>(reg)); } } // namespace arm diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index ac5ef212ba..712a24cf67 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -19,6 +19,7 @@ #include "code_generator.h" #include "nodes.h" +#include "parallel_move_resolver.h" #include "utils/arm/assembler_arm32.h" namespace art { @@ -59,6 +60,27 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; +class ParallelMoveResolverARM : public ParallelMoveResolver { + public: + ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen) + : ParallelMoveResolver(allocator), codegen_(codegen) {} + + virtual void EmitMove(size_t index) OVERRIDE; + virtual void EmitSwap(size_t index) OVERRIDE; + virtual void SpillScratch(int reg) OVERRIDE; + virtual void RestoreScratch(int reg) OVERRIDE; + + ArmAssembler* GetAssembler() const; + + private: + void Exchange(Register reg, int mem); + void Exchange(int mem1, int mem2); + + CodeGeneratorARM* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM); +}; + class LocationsBuilderARM : public HGraphVisitor { public: explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen) @@ -145,6 +167,10 @@ class CodeGeneratorARM : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + ParallelMoveResolverARM* GetMoveResolver() { + return &move_resolver_; + } + private: // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); @@ -153,6 +179,7 @@ class CodeGeneratorARM : public CodeGenerator { LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; + ParallelMoveResolverARM move_resolver_; Arm32Assembler assembler_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c7dca86dab..342a191a47 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -157,7 +157,7 @@ int32_t CodeGeneratorX86::GetStackSlot(HLocal* local) const { uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs(); if (reg_number >= number_of_vregs - number_of_in_vregs) { // Local is a parameter of the method. It is stored in the caller's frame. - return GetFrameSize() + kX86WordSize // ART method + return GetFrameSize() + kVRegSize // ART method + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize; } else { // Local is a temporary in this method. It is stored in this method's frame. @@ -221,7 +221,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (index < calling_convention.GetNumberOfRegisters()) { return X86CpuLocation(calling_convention.GetRegisterAt(index)); } else { - return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize)); + return Location::StackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -234,7 +234,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { return Location::QuickParameter(index); } else { - return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize)); + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index)); } } @@ -286,7 +286,7 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(destination.AsX86().AsRegisterPairLow(), calling_convention.GetRegisterAt(argument_index)); __ movl(destination.AsX86().AsRegisterPairHigh(), Address(ESP, - calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize())); + calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); } else { DCHECK(source.IsDoubleStackSlot()); __ movl(destination.AsX86().AsRegisterPairLow(), Address(ESP, source.GetStackIndex())); @@ -298,14 +298,14 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { uint32_t argument_index = destination.GetQuickParameterIndex(); if (source.IsRegister()) { __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow()); - __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)), + __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)), source.AsX86().AsRegisterPairHigh()); } else { DCHECK(source.IsDoubleStackSlot()); __ movl(calling_convention.GetRegisterAt(argument_index), Address(ESP, source.GetStackIndex())); __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize))); + __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1))); } } else { if (source.IsRegister()) { @@ -318,7 +318,7 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(Address(ESP, destination.GetStackIndex()), calling_convention.GetRegisterAt(argument_index)); __ pushl(Address(ESP, - calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize())); + calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize())); __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize))); } else { DCHECK(source.IsDoubleStackSlot()); @@ -847,7 +847,7 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src) { ScratchRegisterScope ensure_scratch( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, src + stack_offset)); __ movl(Address(ESP, dst + stack_offset), static_cast<Register>(ensure_scratch.GetRegister())); @@ -879,7 +879,10 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } void ParallelMoveResolverX86::Exchange(Register reg, int mem) { - ScratchRegisterScope ensure_scratch(this, reg, codegen_->GetNumberOfCoreRegisters()); + Register suggested_scratch = reg == EAX ? EBX : EAX; + ScratchRegisterScope ensure_scratch( + this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); __ movl(Address(ESP, mem + stack_offset), reg); @@ -889,9 +892,12 @@ void ParallelMoveResolverX86::Exchange(Register reg, int mem) { void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { ScratchRegisterScope ensure_scratch1( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + + Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; ScratchRegisterScope ensure_scratch2( - this, ensure_scratch1.GetRegister(), codegen_->GetNumberOfCoreRegisters()); + this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); + int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc new file mode 100644 index 0000000000..ef17ca73df --- /dev/null +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -0,0 +1,708 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_x86_64.h" + +#include "entrypoints/quick/quick_entrypoints.h" +#include "mirror/array.h" +#include "mirror/art_method.h" +#include "mirror/object_reference.h" +#include "thread.h" +#include "utils/assembler.h" +#include "utils/x86_64/assembler_x86_64.h" +#include "utils/x86_64/managed_register_x86_64.h" + +#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> + +namespace art { + +x86_64::X86_64ManagedRegister Location::AsX86_64() const { + return reg().AsX86_64(); +} + +namespace x86_64 { + +static constexpr int kNumberOfPushedRegistersAtEntry = 1; +static constexpr int kCurrentMethodStackOffset = 0; + +void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { + stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); +} + +void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { + stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); +} + +static Location X86_64CpuLocation(Register reg) { + return Location::RegisterLocation(X86_64ManagedRegister::FromCpuRegister(reg)); +} + +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) + : CodeGenerator(graph, kNumberOfRegIds), + location_builder_(graph, this), + instruction_visitor_(graph, this) {} + +InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) + : HGraphVisitor(graph), + assembler_(codegen->GetAssembler()), + codegen_(codegen) {} + +ManagedRegister CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type, + bool* blocked_registers) const { + switch (type) { + case Primitive::kPrimLong: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters); + return X86_64ManagedRegister::FromCpuRegister(static_cast<Register>(reg)); + } + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented register type " << type; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } + + return ManagedRegister::NoRegister(); +} + +void CodeGeneratorX86_64::SetupBlockedRegisters(bool* blocked_registers) const { + // Stack register is always reserved. + blocked_registers[RSP] = true; + + // TODO: We currently don't use Quick's callee saved registers. + blocked_registers[RBX] = true; + blocked_registers[RBP] = true; + blocked_registers[R12] = true; + blocked_registers[R13] = true; + blocked_registers[R14] = true; + blocked_registers[R15] = true; +} + +void CodeGeneratorX86_64::ComputeFrameSize(size_t number_of_spill_slots) { + // Add the current ART method to the frame size, the return PC, and the filler. + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + kVRegSize // filler + + kVRegSize // Art method + + kNumberOfPushedRegistersAtEntry * kX86_64WordSize, + kStackAlignment)); +} + +void CodeGeneratorX86_64::GenerateFrameEntry() { + // Create a fake register to mimic Quick. + static const int kFakeReturnRegister = 16; + core_spill_mask_ |= (1 << kFakeReturnRegister); + + // The return PC has already been pushed on the stack. + __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); + __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); +} + +void CodeGeneratorX86_64::GenerateFrameExit() { + __ addq(CpuRegister(RSP), + Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); +} + +void CodeGeneratorX86_64::Bind(Label* label) { + __ Bind(label); +} + +void InstructionCodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { + __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); +} + +int32_t CodeGeneratorX86_64::GetStackSlot(HLocal* local) const { + uint16_t reg_number = local->GetRegNumber(); + uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs(); + uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs(); + if (reg_number >= number_of_vregs - number_of_in_vregs) { + // Local is a parameter of the method. It is stored in the caller's frame. + return GetFrameSize() + kVRegSize // ART method + + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize; + } else { + // Local is a temporary in this method. It is stored in this method's frame. + return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86_64WordSize) + - kVRegSize + - (number_of_vregs * kVRegSize) + + (reg_number * kVRegSize); + } +} + +Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { + switch (load->GetType()) { + case Primitive::kPrimLong: + return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); + break; + + case Primitive::kPrimInt: + case Primitive::kPrimNot: + return Location::StackSlot(GetStackSlot(load->GetLocal())); + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + LOG(FATAL) << "Unimplemented type " << load->GetType(); + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type " << load->GetType(); + } + + LOG(FATAL) << "Unreachable"; + return Location(); +} + +void CodeGeneratorX86_64::Move(Location destination, Location source) { + if (source.Equals(destination)) { + return; + } + if (destination.IsRegister()) { + if (source.IsRegister()) { + __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister()); + } else if (source.IsStackSlot()) { + __ movl(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(source.IsDoubleStackSlot()); + __ movq(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex())); + } + } else if (destination.IsStackSlot()) { + if (source.IsRegister()) { + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister()); + } else { + DCHECK(source.IsStackSlot()); + __ movl(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX)); + } + } else { + DCHECK(destination.IsDoubleStackSlot()); + if (source.IsRegister()) { + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister()); + } else { + DCHECK(source.IsDoubleStackSlot()); + __ movq(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX)); + } + } +} + +void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) { + if (instruction->AsIntConstant() != nullptr) { + Immediate imm(instruction->AsIntConstant()->GetValue()); + if (location.IsRegister()) { + __ movq(location.AsX86_64().AsCpuRegister(), imm); + } else { + __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm); + } + } else if (instruction->AsLongConstant() != nullptr) { + int64_t value = instruction->AsLongConstant()->GetValue(); + if (location.IsRegister()) { + __ movq(location.AsX86_64().AsCpuRegister(), Immediate(value)); + } else { + __ movq(CpuRegister(RAX), Immediate(value)); + __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(RAX)); + } + } else if (instruction->AsLoadLocal() != nullptr) { + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal()))); + break; + + case Primitive::kPrimLong: + Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal()))); + break; + + default: + LOG(FATAL) << "Unimplemented local type " << instruction->GetType(); + } + } else { + // This can currently only happen when the instruction that requests the move + // is the next to be compiled. + DCHECK_EQ(instruction->GetNext(), move_for); + switch (instruction->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + Move(location, instruction->GetLocations()->Out()); + break; + + default: + LOG(FATAL) << "Unimplemented type " << instruction->GetType(); + } + } +} + +void LocationsBuilderX86_64::VisitGoto(HGoto* got) { + got->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { + HBasicBlock* successor = got->GetSuccessor(); + if (GetGraph()->GetExitBlock() == successor) { + codegen_->GenerateFrameExit(); + } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { + __ jmp(codegen_->GetLabelOf(successor)); + } +} + +void LocationsBuilderX86_64::VisitExit(HExit* exit) { + exit->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { + if (kIsDebugBuild) { + __ Comment("Unreachable"); + __ int3(); + } +} + +void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + locations->SetInAt(0, X86_64CpuLocation(RAX)); + if_instr->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { + // TODO: Generate the input as a condition, instead of materializing in a register. + __ cmpl(if_instr->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), Immediate(0)); + __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { + __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + } +} + +void LocationsBuilderX86_64::VisitLocal(HLocal* local) { + local->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) { + DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock()); +} + +void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) { + local->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load) { + // Nothing to do, this is driven by the code generator. +} + +void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store); + switch (store->InputAt(1)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal()))); + break; + + case Primitive::kPrimLong: + locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal()))); + break; + + default: + LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType(); + } + store->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) { +} + +void LocationsBuilderX86_64::VisitEqual(HEqual* equal) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal); + locations->SetInAt(0, X86_64CpuLocation(RAX)); + locations->SetInAt(1, X86_64CpuLocation(RCX)); + locations->SetOut(X86_64CpuLocation(RAX)); + equal->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* equal) { + __ cmpq(equal->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), + equal->GetLocations()->InAt(1).AsX86_64().AsCpuRegister()); + __ setcc(kEqual, equal->GetLocations()->Out().AsX86_64().AsCpuRegister()); +} + +void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { + // TODO: Support constant locations. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::RequiresRegister()); + constant->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { + // TODO: Support constant locations. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::RequiresRegister()); + constant->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) { + // Will be generated at use site. +} + +void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { + ret->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) { + codegen_->GenerateFrameExit(); + __ ret(); +} + +void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret); + switch (ret->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + locations->SetInAt(0, X86_64CpuLocation(RAX)); + break; + + default: + LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType(); + } + ret->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { + if (kIsDebugBuild) { + switch (ret->InputAt(0)->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), RAX); + break; + + default: + LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType(); + } + } + codegen_->GenerateFrameExit(); + __ ret(); +} + +static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +class InvokeRuntimeCallingConvention : public CallingConvention<Register> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + uint32_t index = gp_index_++; + stack_index_++; + if (index < calling_convention.GetNumberOfRegisters()) { + return X86_64CpuLocation(calling_convention.GetRegisterAt(index)); + } else { + return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); + } + } + + case Primitive::kPrimLong: { + uint32_t index = gp_index_; + stack_index_ += 2; + if (index < calling_convention.GetNumberOfRegisters()) { + gp_index_ += 1; + return X86_64CpuLocation(calling_convention.GetRegisterAt(index)); + } else { + gp_index_ += 2; + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); + } + } + + case Primitive::kPrimDouble: + case Primitive::kPrimFloat: + LOG(FATAL) << "Unimplemented parameter type " << type; + break; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected parameter type " << type; + break; + } + return Location(); +} + +void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke); + locations->AddTemp(X86_64CpuLocation(RDI)); + + InvokeDexCallingConventionVisitor calling_convention_visitor; + for (size_t i = 0; i < invoke->InputCount(); ++i) { + HInstruction* input = invoke->InputAt(i); + locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); + } + + switch (invoke->GetType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + locations->SetOut(X86_64CpuLocation(RAX)); + break; + + case Primitive::kPrimVoid: + break; + + case Primitive::kPrimDouble: + case Primitive::kPrimFloat: + LOG(FATAL) << "Unimplemented return type " << invoke->GetType(); + break; + } + + invoke->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister(); + uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>); + size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).SizeValue() + + invoke->GetIndexInDexCache() * heap_reference_size; + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, index_in_cache)); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue())); + + codegen_->RecordPcInfo(invoke->GetDexPc()); +} + +void LocationsBuilderX86_64::VisitAdd(HAdd* add) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add); + switch (add->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, X86_64CpuLocation(RAX)); + locations->SetInAt(1, X86_64CpuLocation(RCX)); + locations->SetOut(X86_64CpuLocation(RAX)); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected add type " << add->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented add type " << add->GetResultType(); + } + add->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { + LocationSummary* locations = add->GetLocations(); + switch (add->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), + locations->Out().AsX86_64().AsCpuRegister().AsRegister()); + __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected add type " << add->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented add type " << add->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitSub(HSub* sub) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub); + switch (sub->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, X86_64CpuLocation(RAX)); + locations->SetInAt(1, X86_64CpuLocation(RCX)); + locations->SetOut(X86_64CpuLocation(RAX)); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType(); + } + sub->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { + LocationSummary* locations = sub->GetLocations(); + switch (sub->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), + locations->Out().AsX86_64().AsCpuRegister().AsRegister()); + __ subq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetOut(X86_64CpuLocation(RAX)); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { + InvokeRuntimeCallingConvention calling_convention; + LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); + __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true)); + + codegen_->RecordPcInfo(instruction->GetDexPc()); +} + +void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitParameterValue(HParameterValue* instruction) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderX86_64::VisitNot(HNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, X86_64CpuLocation(RAX)); + locations->SetOut(X86_64CpuLocation(RAX)); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitNot(HNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), + locations->Out().AsX86_64().AsCpuRegister().AsRegister()); + __ xorq(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1)); +} + +void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); + instruction->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { + LOG(FATAL) << "Unimplemented"; +} + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h new file mode 100644 index 0000000000..ac7ee9ffc4 --- /dev/null +++ b/compiler/optimizing/code_generator_x86_64.h @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ + +#include "code_generator.h" +#include "nodes.h" +#include "utils/x86_64/assembler_x86_64.h" + +namespace art { +namespace x86_64 { + +static constexpr size_t kX86_64WordSize = 8; + +static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; + +static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); + +class InvokeDexCallingConvention : public CallingConvention<Register> { + public: + InvokeDexCallingConvention() + : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); +}; + +class InvokeDexCallingConventionVisitor { + public: + InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {} + + Location GetNextLocation(Primitive::Type type); + + private: + InvokeDexCallingConvention calling_convention; + uint32_t gp_index_; + uint32_t stack_index_; + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); +}; + +class CodeGeneratorX86_64; + +class LocationsBuilderX86_64 : public HGraphVisitor { + public: + LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) + : HGraphVisitor(graph), codegen_(codegen) {} + +#define DECLARE_VISIT_INSTRUCTION(name) \ + virtual void Visit##name(H##name* instr); + + FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + private: + CodeGeneratorX86_64* const codegen_; + InvokeDexCallingConventionVisitor parameter_visitor_; + + DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); +}; + +class InstructionCodeGeneratorX86_64 : public HGraphVisitor { + public: + InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); + +#define DECLARE_VISIT_INSTRUCTION(name) \ + virtual void Visit##name(H##name* instr); + + FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void LoadCurrentMethod(CpuRegister reg); + + X86_64Assembler* GetAssembler() const { return assembler_; } + + private: + X86_64Assembler* const assembler_; + CodeGeneratorX86_64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); +}; + +class CodeGeneratorX86_64 : public CodeGenerator { + public: + explicit CodeGeneratorX86_64(HGraph* graph); + virtual ~CodeGeneratorX86_64() {} + + virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE; + virtual void GenerateFrameEntry() OVERRIDE; + virtual void GenerateFrameExit() OVERRIDE; + virtual void Bind(Label* label) OVERRIDE; + virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + + virtual size_t GetWordSize() const OVERRIDE { + return kX86_64WordSize; + } + + virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { + return &location_builder_; + } + + virtual HGraphVisitor* GetInstructionVisitor() OVERRIDE { + return &instruction_visitor_; + } + + virtual X86_64Assembler* GetAssembler() OVERRIDE { + return &assembler_; + } + + int32_t GetStackSlot(HLocal* local) const; + virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; + + virtual size_t GetNumberOfRegisters() const OVERRIDE { + return kNumberOfRegIds; + } + + virtual size_t GetNumberOfCoreRegisters() const OVERRIDE { + return kNumberOfCpuRegisters; + } + + virtual size_t GetNumberOfFloatingPointRegisters() const OVERRIDE { + return kNumberOfFloatRegisters; + } + + virtual void SetupBlockedRegisters(bool* blocked_registers) const OVERRIDE; + virtual ManagedRegister AllocateFreeRegister( + Primitive::Type type, bool* blocked_registers) const OVERRIDE; + virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; + virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + + private: + // Helper method to move a value between two locations. + void Move(Location destination, Location source); + + LocationsBuilderX86_64 location_builder_; + InstructionCodeGeneratorX86_64 instruction_visitor_; + X86_64Assembler assembler_; + + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 8ee775cbe1..c3baf1a7b7 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -47,6 +47,17 @@ class InternalCodeAllocator : public CodeAllocator { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; +#if defined(__i386__) || defined(__arm__) || defined(__x86_64__) +static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) { + typedef int32_t (*fptr)(); + CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); + int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); + if (has_result) { + CHECK_EQ(result, expected); + } +} +#endif + static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); @@ -55,24 +66,23 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe HGraph* graph = builder.BuildGraph(*item); ASSERT_NE(graph, nullptr); InternalCodeAllocator allocator; + CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86); codegen->CompileBaseline(&allocator); - typedef int32_t (*fptr)(); #if defined(__i386__) - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); - if (has_result) { - CHECK_EQ(result, expected); - } + Run(allocator, has_result, expected); #endif + codegen = CodeGenerator::Create(&arena, graph, kArm); codegen->CompileBaseline(&allocator); #if defined(__arm__) - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())(); - if (has_result) { - CHECK_EQ(result, expected); - } + Run(allocator, has_result, expected); +#endif + + codegen = CodeGenerator::Create(&arena, graph, kX86_64); + codegen->CompileBaseline(&allocator); +#if defined(__x86_64__) + Run(allocator, has_result, expected); #endif } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 3c60d3cbe8..40a39ad80d 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -150,6 +150,7 @@ class Location : public ValueObject { arm::ArmManagedRegister AsArm() const; x86::X86ManagedRegister AsX86() const; + x86_64::X86_64ManagedRegister AsX86_64() const; Kind GetKind() const { return KindField::Decode(value_); diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 4a1b6ce446..cadd3c54d6 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -163,7 +163,11 @@ bool ParallelMoveResolver::IsScratchLocation(Location loc) { return false; } -int ParallelMoveResolver::AllocateScratchRegister(int blocked, int register_count, bool* spilled) { +int ParallelMoveResolver::AllocateScratchRegister(int blocked, + int register_count, + int if_scratch, + bool* spilled) { + DCHECK_NE(blocked, if_scratch); int scratch = -1; for (int reg = 0; reg < register_count; ++reg) { if ((blocked != reg) && @@ -175,11 +179,7 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, int register_coun if (scratch == -1) { *spilled = true; - for (int reg = 0; reg < register_count; ++reg) { - if (blocked != reg) { - scratch = reg; - } - } + scratch = if_scratch; } else { *spilled = false; } @@ -189,11 +189,11 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, int register_coun ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( - ParallelMoveResolver* resolver, int blocked, int number_of_registers) + ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), reg_(kNoRegister), spilled_(false) { - reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, &spilled_); + reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_); if (spilled_) { resolver->SpillScratch(reg_); diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index e1189d8520..fcc1de6dc9 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -42,7 +42,10 @@ class ParallelMoveResolver : public ValueObject { protected: class ScratchRegisterScope : public ValueObject { public: - ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int number_of_registers); + ScratchRegisterScope(ParallelMoveResolver* resolver, + int blocked, + int if_scratch, + int number_of_registers); ~ScratchRegisterScope(); int GetRegister() const { return reg_; } @@ -55,7 +58,7 @@ class ParallelMoveResolver : public ValueObject { }; bool IsScratchLocation(Location loc); - int AllocateScratchRegister(int blocked, int register_count, bool* spilled); + int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); // Emit a move. virtual void EmitMove(size_t index) = 0; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index c2a47697de..348e9d4921 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -651,7 +651,9 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, // Move must happen after the instruction. DCHECK(!at->IsControlFlow()); move = at->GetNext()->AsParallelMove(); - if (move == nullptr || IsInputMove(move)) { + // This is a parallel move for connecting siblings in a same block. We need to + // differentiate it with moves for connecting blocks, and input moves. + if (move == nullptr || move->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(position); at->GetBlock()->InsertInstructionBefore(move, at->GetNext()); @@ -660,7 +662,9 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, // Move must happen before the instruction. HInstruction* previous = at->GetPrevious(); if (previous != nullptr && previous->AsParallelMove() != nullptr) { - if (IsInputMove(previous)) { + // This is a parallel move for connecting siblings in a same block. We need to + // differentiate it with moves for connecting blocks, and input moves. + if (previous->GetLifetimePosition() != position) { previous = previous->GetPrevious(); } } @@ -684,8 +688,12 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, HInstruction* last = block->GetLastInstruction(); HInstruction* previous = last->GetPrevious(); HParallelMove* move; - if (previous == nullptr || previous->AsParallelMove() == nullptr) { + // This is a parallel move for connecting blocks. We need to differentiate + // it with moves for connecting siblings in a same block, and output moves. + if (previous == nullptr || previous->AsParallelMove() == nullptr + || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) { move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(block->GetLifetimeEnd()); block->InsertInstructionBefore(move, last); } else { move = previous->AsParallelMove(); @@ -700,7 +708,9 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block, HInstruction* first = block->GetFirstInstruction(); HParallelMove* move = first->AsParallelMove(); - if (move == nullptr || IsInputMove(move)) { + // This is a parallel move for connecting blocks. We need to differentiate + // it with moves for connecting siblings in a same block, and input moves. + if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(block->GetLifetimeStart()); block->InsertInstructionBefore(move, first); @@ -718,9 +728,14 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction, return; } + size_t position = instruction->GetLifetimePosition() + 1; HParallelMove* move = instruction->GetNext()->AsParallelMove(); - if (move == nullptr || IsInputMove(move)) { + // This is a parallel move for moving the output of an instruction. We need + // to differentiate with input moves, moves for connecting siblings in a + // and moves for connecting blocks. + if (move == nullptr || move->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext()); } move->AddMove(new (allocator_) MoveOperands(source, destination)); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 1b5585f36c..8b7c4f1ff1 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -65,7 +65,7 @@ class RegisterAllocator { static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); static bool Supports(InstructionSet instruction_set) { - return instruction_set == kX86; + return instruction_set == kX86 || instruction_set == kArm; } size_t GetNumberOfSpillSlots() const { |