diff options
Diffstat (limited to 'compiler/optimizing')
38 files changed, 2593 insertions, 1107 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index be8631ad42..777a117a3b 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -305,6 +305,24 @@ void HGraphBuilder::Binop_23x(const Instruction& instruction, } template<typename T> +void HGraphBuilder::Binop_23x_shift(const Instruction& instruction, + Primitive::Type type) { + HInstruction* first = LoadLocal(instruction.VRegB(), type); + HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt); + current_block_->AddInstruction(new (arena_) T(type, first, second)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); +} + +void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction, + Primitive::Type type, + HCompare::Bias bias) { + HInstruction* first = LoadLocal(instruction.VRegB(), type); + HInstruction* second = LoadLocal(instruction.VRegC(), type); + current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); +} + +template<typename T> void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) { HInstruction* first = LoadLocal(instruction.VRegA(), type); HInstruction* second = LoadLocal(instruction.VRegB(), type); @@ -313,6 +331,14 @@ void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type ty } template<typename T> +void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type) { + HInstruction* first = LoadLocal(instruction.VRegA(), type); + HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); + current_block_->AddInstruction(new (arena_) T(type, first, second)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); +} + +template<typename T> void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc) { @@ -1017,6 +1043,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::LONG_TO_FLOAT: { + Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat); + break; + } + + case Instruction::LONG_TO_DOUBLE: { + Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble); + break; + } + case Instruction::INT_TO_BYTE: { Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte); break; @@ -1141,6 +1177,36 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::SHL_INT: { + Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::SHL_LONG: { + Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong); + break; + } + + case Instruction::SHR_INT: { + Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::SHR_LONG: { + Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong); + break; + } + + case Instruction::USHR_INT: { + Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::USHR_LONG: { + Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong); + break; + } + case Instruction::OR_INT: { Binop_23x<HOr>(instruction, Primitive::kPrimInt); break; @@ -1240,6 +1306,36 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::SHL_INT_2ADDR: { + Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::SHL_LONG_2ADDR: { + Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong); + break; + } + + case Instruction::SHR_INT_2ADDR: { + Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::SHR_LONG_2ADDR: { + Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong); + break; + } + + case Instruction::USHR_INT_2ADDR: { + Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::USHR_LONG_2ADDR: { + Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong); + break; + } + case Instruction::DIV_FLOAT_2ADDR: { Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc); break; @@ -1354,6 +1450,21 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::SHL_INT_LIT8: { + Binop_22b<HShl>(instruction, false); + break; + } + + case Instruction::SHR_INT_LIT8: { + Binop_22b<HShr>(instruction, false); + break; + } + + case Instruction::USHR_INT_LIT8: { + Binop_22b<HUShr>(instruction, false); + break; + } + case Instruction::NEW_INSTANCE: { current_block_->AddInstruction( new (arena_) HNewInstance(dex_pc, instruction.VRegB_21c())); @@ -1400,7 +1511,27 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; case Instruction::CMP_LONG: { - Binop_23x<HCompare>(instruction, Primitive::kPrimLong); + Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias); + break; + } + + case Instruction::CMPG_FLOAT: { + Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias); + break; + } + + case Instruction::CMPG_DOUBLE: { + Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias); + break; + } + + case Instruction::CMPL_FLOAT: { + Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias); + break; + } + + case Instruction::CMPL_DOUBLE: { + Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 897bcece7b..25781b08f4 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -105,12 +105,20 @@ class HGraphBuilder : public ValueObject { void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); template<typename T> + void Binop_23x_shift(const Instruction& instruction, Primitive::Type type); + + void Binop_23x_cmp(const Instruction& instruction, Primitive::Type type, HCompare::Bias bias); + + template<typename T> void Binop_12x(const Instruction& instruction, Primitive::Type type); template<typename T> void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); template<typename T> + void Binop_12x_shift(const Instruction& instruction, Primitive::Type type); + + template<typename T> void Binop_22b(const Instruction& instruction, bool reverse); template<typename T> diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4d71cb780a..e581af22aa 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -71,11 +71,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { } } GenerateSlowPaths(); - - size_t code_size = GetAssembler()->CodeSize(); - uint8_t* buffer = allocator->Allocate(code_size); - MemoryRegion code(buffer, code_size); - GetAssembler()->FinalizeInstructions(code); + Finalize(allocator); } void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { @@ -97,9 +93,13 @@ void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { } } GenerateSlowPaths(); + Finalize(allocator); +} +void CodeGenerator::Finalize(CodeAllocator* allocator) { size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); + MemoryRegion code(buffer, code_size); GetAssembler()->FinalizeInstructions(code); } @@ -228,7 +228,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { DCHECK(!blocked_fpu_registers_[loc.reg()]); blocked_fpu_registers_[loc.reg()] = true; } else { - DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister); + DCHECK(loc.GetPolicy() == Location::kRequiresRegister + || loc.GetPolicy() == Location::kRequiresFpuRegister); } } @@ -259,10 +260,21 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { Location loc = locations->GetTemp(i); if (loc.IsUnallocated()) { - DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister); - // TODO: Adjust handling of temps. We currently consider temps to use - // core registers. They may also use floating point registers at some point. - loc = AllocateFreeRegister(Primitive::kPrimInt); + switch (loc.GetPolicy()) { + case Location::kRequiresRegister: + // Allocate a core register (large enough to fit a 32-bit integer). + loc = AllocateFreeRegister(Primitive::kPrimInt); + break; + + case Location::kRequiresFpuRegister: + // Allocate a core register (large enough to fit a 64-bit double). + loc = AllocateFreeRegister(Primitive::kPrimDouble); + break; + + default: + LOG(FATAL) << "Unexpected policy for temporary location " + << loc.GetPolicy(); + } locations->SetTempAt(i, loc); } } @@ -589,12 +601,14 @@ void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); } + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); stack_offset += SaveCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (register_set->ContainsFloatingPointRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); stack_offset += SaveFloatingPointRegister(stack_offset, i); } } @@ -605,12 +619,14 @@ void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { size_t stack_offset = first_register_slot_in_slow_path_; for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { if (register_set->ContainsCoreRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); stack_offset += RestoreCoreRegister(stack_offset, i); } } for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { if (register_set->ContainsFloatingPointRegister(i)) { + DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize()); stack_offset += RestoreFloatingPointRegister(stack_offset, i); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f906eb8c05..4c0d3ea960 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -30,6 +30,11 @@ namespace art { static size_t constexpr kVRegSize = 4; static size_t constexpr kUninitializedFrameSize = 0; +// Binary encoding of 2^32 for type double. +static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); +// Binary encoding of 2^31 for type double. +static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); + class Assembler; class CodeGenerator; class DexCompilationUnit; @@ -85,6 +90,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { } virtual void Initialize() = 0; + virtual void Finalize(CodeAllocator* allocator); virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index dc0a829f65..5b2be2e9a1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -495,7 +495,8 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene codegen_(codegen) {} void CodeGeneratorARM::GenerateFrameEntry() { - bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); if (!skip_overflow_check) { if (kExplicitStackOverflowCheck) { SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM(); @@ -655,26 +656,26 @@ void CodeGeneratorARM::Move32(Location destination, Location source) { } if (destination.IsRegister()) { if (source.IsRegister()) { - __ Mov(destination.As<Register>(), source.As<Register>()); + __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsFpuRegister()) { - __ vmovrs(destination.As<Register>(), source.As<SRegister>()); + __ vmovrs(destination.AsRegister<Register>(), source.AsFpuRegister<SRegister>()); } else { - __ LoadFromOffset(kLoadWord, destination.As<Register>(), SP, source.GetStackIndex()); + __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex()); } } else if (destination.IsFpuRegister()) { if (source.IsRegister()) { - __ vmovsr(destination.As<SRegister>(), source.As<Register>()); + __ vmovsr(destination.AsFpuRegister<SRegister>(), source.AsRegister<Register>()); } else if (source.IsFpuRegister()) { - __ vmovs(destination.As<SRegister>(), source.As<SRegister>()); + __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>()); } else { - __ LoadSFromOffset(destination.As<SRegister>(), SP, source.GetStackIndex()); + __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex()); } } else { DCHECK(destination.IsStackSlot()) << destination; if (source.IsRegister()) { - __ StoreToOffset(kStoreWord, source.As<Register>(), SP, destination.GetStackIndex()); + __ StoreToOffset(kStoreWord, source.AsRegister<Register>(), SP, destination.GetStackIndex()); } else if (source.IsFpuRegister()) { - __ StoreSToOffset(source.As<SRegister>(), SP, destination.GetStackIndex()); + __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); } else { DCHECK(source.IsStackSlot()) << source; __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); @@ -689,19 +690,25 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { } if (destination.IsRegisterPair()) { if (source.IsRegisterPair()) { - __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>()); - __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>()); + EmitParallelMoves( + Location::RegisterLocation(source.AsRegisterPairHigh<Register>()), + Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()), + Location::RegisterLocation(source.AsRegisterPairLow<Register>()), + Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); } else if (source.IsQuickParameter()) { uint16_t register_index = source.GetQuickParameterRegisterIndex(); uint16_t stack_index = source.GetQuickParameterStackIndex(); InvokeDexCallingConvention calling_convention; - __ Mov(destination.AsRegisterPairLow<Register>(), - calling_convention.GetRegisterAt(register_index)); - __ LoadFromOffset(kLoadWord, destination.AsRegisterPairHigh<Register>(), - SP, calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()); + EmitParallelMoves( + Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), + Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), + Location::StackSlot( + calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()), + Location::RegisterLocation(destination.AsRegisterPairHigh<Register>())); } else { + // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); if (destination.AsRegisterPairLow<Register>() == R1) { DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2); @@ -725,22 +732,21 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { uint16_t register_index = destination.GetQuickParameterRegisterIndex(); uint16_t stack_index = destination.GetQuickParameterStackIndex(); if (source.IsRegisterPair()) { - __ Mov(calling_convention.GetRegisterAt(register_index), - source.AsRegisterPairLow<Register>()); - __ StoreToOffset(kStoreWord, source.AsRegisterPairHigh<Register>(), - SP, calling_convention.GetStackOffsetOf(stack_index + 1)); + UNIMPLEMENTED(FATAL); } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); } else { DCHECK(source.IsDoubleStackSlot()); - __ LoadFromOffset( - kLoadWord, calling_convention.GetRegisterAt(register_index), SP, source.GetStackIndex()); - __ LoadFromOffset(kLoadWord, R0, SP, source.GetHighStackIndex(kArmWordSize)); - __ StoreToOffset(kStoreWord, R0, SP, calling_convention.GetStackOffsetOf(stack_index + 1)); + EmitParallelMoves( + Location::StackSlot(source.GetStackIndex()), + Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), + Location::StackSlot(source.GetHighStackIndex(kArmWordSize)), + Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); } } else { DCHECK(destination.IsDoubleStackSlot()); if (source.IsRegisterPair()) { + // No conflict possible, so just do the moves. if (source.AsRegisterPairLow<Register>() == R1) { DCHECK_EQ(source.AsRegisterPairHigh<Register>(), R2); __ StoreToOffset(kStoreWord, R1, SP, destination.GetStackIndex()); @@ -753,21 +759,24 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { InvokeDexCallingConvention calling_convention; uint16_t register_index = source.GetQuickParameterRegisterIndex(); uint16_t stack_index = source.GetQuickParameterStackIndex(); + // Just move the low part. The only time a source is a quick parameter is + // when moving the parameter to its stack locations. And the (Java) caller + // of this method has already done that. __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index), - SP, destination.GetStackIndex()); - __ LoadFromOffset(kLoadWord, R0, - SP, calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()); - __ StoreToOffset(kStoreWord, R0, SP, destination.GetHighStackIndex(kArmWordSize)); + SP, destination.GetStackIndex()); + DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(), + static_cast<size_t>(destination.GetHighStackIndex(kArmWordSize))); } else if (source.IsFpuRegisterPair()) { __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()), SP, destination.GetStackIndex()); } else { DCHECK(source.IsDoubleStackSlot()); - __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); - __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + EmitParallelMoves( + Location::StackSlot(source.GetStackIndex()), + Location::StackSlot(destination.GetStackIndex()), + Location::StackSlot(source.GetHighStackIndex(kArmWordSize)), + Location::StackSlot(destination.GetHighStackIndex(kArmWordSize))); } } } @@ -783,7 +792,7 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr if (const_to_move->IsIntConstant()) { int32_t value = const_to_move->AsIntConstant()->GetValue(); if (location.IsRegister()) { - __ LoadImmediate(location.As<Register>(), value); + __ LoadImmediate(location.AsRegister<Register>(), value); } else { DCHECK(location.IsStackSlot()); __ LoadImmediate(IP, value); @@ -933,27 +942,27 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { // Condition has been materialized, compare the output to 0 DCHECK(if_instr->GetLocations()->InAt(0).IsRegister()); - __ cmp(if_instr->GetLocations()->InAt(0).As<Register>(), + __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(), ShifterOperand(0)); __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE); } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. LocationSummary* locations = cond->GetLocations(); + Register left = locations->InAt(0).AsRegister<Register>(); if (locations->InAt(1).IsRegister()) { - __ cmp(locations->InAt(0).As<Register>(), - ShifterOperand(locations->InAt(1).As<Register>())); + __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); } else { DCHECK(locations->InAt(1).IsConstant()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ShifterOperand operand; - if (ShifterOperand::CanHoldArm(value, &operand)) { - __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(value)); + if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { + __ cmp(left, operand); } else { Register temp = IP; __ LoadImmediate(temp, value); - __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(temp)); + __ cmp(left, ShifterOperand(temp)); } } __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), @@ -979,27 +988,27 @@ void LocationsBuilderARM::VisitCondition(HCondition* comp) { void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) { if (!comp->NeedsMaterialization()) return; - LocationSummary* locations = comp->GetLocations(); + Register left = locations->InAt(0).AsRegister<Register>(); + if (locations->InAt(1).IsRegister()) { - __ cmp(locations->InAt(0).As<Register>(), - ShifterOperand(locations->InAt(1).As<Register>())); + __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); } else { DCHECK(locations->InAt(1).IsConstant()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ShifterOperand operand; - if (ShifterOperand::CanHoldArm(value, &operand)) { - __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(value)); + if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { + __ cmp(left, operand); } else { Register temp = IP; __ LoadImmediate(temp, value); - __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(temp)); + __ cmp(left, ShifterOperand(temp)); } } __ it(ARMCondition(comp->GetCondition()), kItElse); - __ mov(locations->Out().As<Register>(), ShifterOperand(1), + __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), ARMCondition(comp->GetCondition())); - __ mov(locations->Out().As<Register>(), ShifterOperand(0), + __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0), ARMOppositeCondition(comp->GetCondition())); } @@ -1169,7 +1178,7 @@ void CodeGeneratorARM::LoadCurrentMethod(Register reg) { } void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).As<Register>(); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); // TODO: Implement all kinds of calls: // 1) boot -> boot @@ -1189,7 +1198,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) { // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmPointerSize).Int32Value()); + kArmWordSize).Int32Value()); // LR() __ blx(LR); @@ -1216,7 +1225,7 @@ void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).As<Register>(); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); LocationSummary* locations = invoke->GetLocations(); @@ -1227,11 +1236,11 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); __ LoadFromOffset(kLoadWord, temp, temp, class_offset); } else { - __ LoadFromOffset(kLoadWord, temp, receiver.As<Register>(), class_offset); + __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmPointerSize).Int32Value(); + kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); @@ -1249,7 +1258,7 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).As<Register>(); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); LocationSummary* locations = invoke->GetLocations(); @@ -1257,18 +1266,19 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); // Set the hidden argument. - __ LoadImmediate(invoke->GetLocations()->GetTemp(1).As<Register>(), invoke->GetDexMethodIndex()); + __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), + invoke->GetDexMethodIndex()); // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); __ LoadFromOffset(kLoadWord, temp, temp, class_offset); } else { - __ LoadFromOffset(kLoadWord, temp, receiver.As<Register>(), class_offset); + __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } // temp = temp->GetImtEntryAt(method_offset); uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmPointerSize).Int32Value(); + kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); @@ -1308,7 +1318,7 @@ void InstructionCodeGeneratorARM::VisitNeg(HNeg* neg) { switch (neg->GetResultType()) { case Primitive::kPrimInt: DCHECK(in.IsRegister()); - __ rsb(out.As<Register>(), in.As<Register>(), ShifterOperand(0)); + __ rsb(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(0)); break; case Primitive::kPrimLong: @@ -1334,7 +1344,7 @@ void InstructionCodeGeneratorARM::VisitNeg(HNeg* neg) { case Primitive::kPrimFloat: DCHECK(in.IsFpuRegister()); - __ vnegs(out.As<SRegister>(), in.As<SRegister>()); + __ vnegs(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>()); break; case Primitive::kPrimDouble: @@ -1353,6 +1363,7 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1434,7 +1445,6 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimChar: // Processing a Dex `int-to-char' instruction. locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -1458,6 +1468,15 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-float' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1481,6 +1500,14 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1504,6 +1531,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio Location in = locations->InAt(0); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1511,7 +1539,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-byte' instruction. - __ sbfx(out.As<Register>(), in.As<Register>(), 0, 8); + __ sbfx(out.AsRegister<Register>(), in.AsRegister<Register>(), 0, 8); break; default: @@ -1526,7 +1554,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimInt: case Primitive::kPrimChar: // Processing a Dex `int-to-short' instruction. - __ sbfx(out.As<Register>(), in.As<Register>(), 0, 16); + __ sbfx(out.AsRegister<Register>(), in.AsRegister<Register>(), 0, 16); break; default: @@ -1541,14 +1569,14 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `long-to-int' instruction. DCHECK(out.IsRegister()); if (in.IsRegisterPair()) { - __ Mov(out.As<Register>(), in.AsRegisterPairLow<Register>()); + __ Mov(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); } else if (in.IsDoubleStackSlot()) { - __ LoadFromOffset(kLoadWord, out.As<Register>(), SP, in.GetStackIndex()); + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), SP, in.GetStackIndex()); } else { DCHECK(in.IsConstant()); DCHECK(in.GetConstant()->IsLongConstant()); int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); - __ LoadImmediate(out.As<Register>(), static_cast<int32_t>(value)); + __ LoadImmediate(out.AsRegister<Register>(), static_cast<int32_t>(value)); } break; @@ -1573,7 +1601,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `int-to-long' instruction. DCHECK(out.IsRegisterPair()); DCHECK(in.IsRegister()); - __ Mov(out.AsRegisterPairLow<Register>(), in.As<Register>()); + __ Mov(out.AsRegisterPairLow<Register>(), in.AsRegister<Register>()); // Sign extension. __ Asr(out.AsRegisterPairHigh<Register>(), out.AsRegisterPairLow<Register>(), @@ -1597,9 +1625,8 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimChar: // Processing a Dex `int-to-char' instruction. - __ ubfx(out.As<Register>(), in.As<Register>(), 0, 16); + __ ubfx(out.AsRegister<Register>(), in.AsRegister<Register>(), 0, 16); break; default: @@ -1615,12 +1642,53 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimInt: case Primitive::kPrimChar: { // Processing a Dex `int-to-float' instruction. - __ vmovsr(out.As<SRegister>(), in.As<Register>()); - __ vcvtsi(out.As<SRegister>(), out.As<SRegister>()); + __ vmovsr(out.AsFpuRegister<SRegister>(), in.AsRegister<Register>()); + __ vcvtsi(out.AsFpuRegister<SRegister>(), out.AsFpuRegister<SRegister>()); + break; + } + + case Primitive::kPrimLong: { + // Processing a Dex `long-to-float' instruction. + Register low = in.AsRegisterPairLow<Register>(); + Register high = in.AsRegisterPairHigh<Register>(); + SRegister output = out.AsFpuRegister<SRegister>(); + Register constant_low = locations->GetTemp(0).AsRegister<Register>(); + Register constant_high = locations->GetTemp(1).AsRegister<Register>(); + SRegister temp1_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>(); + DRegister temp1_d = FromLowSToD(temp1_s); + SRegister temp2_s = locations->GetTemp(3).AsFpuRegisterPairLow<SRegister>(); + DRegister temp2_d = FromLowSToD(temp2_s); + + // Operations use doubles for precision reasons (each 32-bit + // half of a long fits in the 53-bit mantissa of a double, + // but not in the 24-bit mantissa of a float). This is + // especially important for the low bits. The result is + // eventually converted to float. + + // temp1_d = int-to-double(high) + __ vmovsr(temp1_s, high); + __ vcvtdi(temp1_d, temp1_s); + // Using vmovd to load the `k2Pow32EncodingForDouble` constant + // as an immediate value into `temp2_d` does not work, as + // this instruction only transfers 8 significant bits of its + // immediate operand. Instead, use two 32-bit core + // registers to load `k2Pow32EncodingForDouble` into + // `temp2_d`. + __ LoadImmediate(constant_low, Low32Bits(k2Pow32EncodingForDouble)); + __ LoadImmediate(constant_high, High32Bits(k2Pow32EncodingForDouble)); + __ vmovdrr(temp2_d, constant_low, constant_high); + // temp1_d = temp1_d * 2^32 + __ vmuld(temp1_d, temp1_d, temp2_d); + // temp2_d = unsigned-to-double(low) + __ vmovsr(temp2_s, low); + __ vcvtdu(temp2_d, temp2_s); + // temp1_d = temp1_d + temp2_d + __ vaddd(temp1_d, temp1_d, temp2_d); + // output = double-to-float(temp1_d); + __ vcvtsd(output, temp1_d); break; } - case Primitive::kPrimLong: case Primitive::kPrimDouble: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1639,13 +1707,44 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimInt: case Primitive::kPrimChar: { // Processing a Dex `int-to-double' instruction. - __ vmovsr(out.AsFpuRegisterPairLow<SRegister>(), in.As<Register>()); + __ vmovsr(out.AsFpuRegisterPairLow<SRegister>(), in.AsRegister<Register>()); __ vcvtdi(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), out.AsFpuRegisterPairLow<SRegister>()); break; } - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + // Processing a Dex `long-to-double' instruction. + Register low = in.AsRegisterPairLow<Register>(); + Register high = in.AsRegisterPairHigh<Register>(); + SRegister out_s = out.AsFpuRegisterPairLow<SRegister>(); + DRegister out_d = FromLowSToD(out_s); + Register constant_low = locations->GetTemp(0).AsRegister<Register>(); + Register constant_high = locations->GetTemp(1).AsRegister<Register>(); + SRegister temp_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>(); + DRegister temp_d = FromLowSToD(temp_s); + + // out_d = int-to-double(high) + __ vmovsr(out_s, high); + __ vcvtdi(out_d, out_s); + // Using vmovd to load the `k2Pow32EncodingForDouble` constant + // as an immediate value into `temp_d` does not work, as + // this instruction only transfers 8 significant bits of its + // immediate operand. Instead, use two 32-bit core + // registers to load `k2Pow32EncodingForDouble` into `temp_d`. + __ LoadImmediate(constant_low, Low32Bits(k2Pow32EncodingForDouble)); + __ LoadImmediate(constant_high, High32Bits(k2Pow32EncodingForDouble)); + __ vmovdrr(temp_d, constant_low, constant_high); + // out_d = out_d * 2^32 + __ vmuld(out_d, out_d, temp_d); + // temp_d = unsigned-to-double(low) + __ vmovsr(temp_s, low); + __ vcvtdu(temp_d, temp_s); + // out_d = out_d + temp_d + __ vaddd(out_d, out_d, temp_d); + break; + } + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1697,10 +1796,12 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { switch (add->GetResultType()) { case Primitive::kPrimInt: if (second.IsRegister()) { - __ add(out.As<Register>(), first.As<Register>(), ShifterOperand(second.As<Register>())); + __ add(out.AsRegister<Register>(), + first.AsRegister<Register>(), + ShifterOperand(second.AsRegister<Register>())); } else { - __ AddConstant(out.As<Register>(), - first.As<Register>(), + __ AddConstant(out.AsRegister<Register>(), + first.AsRegister<Register>(), second.GetConstant()->AsIntConstant()->GetValue()); } break; @@ -1715,7 +1816,9 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) { break; case Primitive::kPrimFloat: - __ vadds(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>()); + __ vadds(out.AsFpuRegister<SRegister>(), + first.AsFpuRegister<SRegister>(), + second.AsFpuRegister<SRegister>()); break; case Primitive::kPrimDouble: @@ -1761,10 +1864,12 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { switch (sub->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ sub(out.As<Register>(), first.As<Register>(), ShifterOperand(second.As<Register>())); + __ sub(out.AsRegister<Register>(), + first.AsRegister<Register>(), + ShifterOperand(second.AsRegister<Register>())); } else { - __ AddConstant(out.As<Register>(), - first.As<Register>(), + __ AddConstant(out.AsRegister<Register>(), + first.AsRegister<Register>(), -second.GetConstant()->AsIntConstant()->GetValue()); } break; @@ -1781,7 +1886,9 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ vsubs(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>()); + __ vsubs(out.AsFpuRegister<SRegister>(), + first.AsFpuRegister<SRegister>(), + second.AsFpuRegister<SRegister>()); break; } @@ -1830,7 +1937,9 @@ void InstructionCodeGeneratorARM::VisitMul(HMul* mul) { Location second = locations->InAt(1); switch (mul->GetResultType()) { case Primitive::kPrimInt: { - __ mul(out.As<Register>(), first.As<Register>(), second.As<Register>()); + __ mul(out.AsRegister<Register>(), + first.AsRegister<Register>(), + second.AsRegister<Register>()); break; } case Primitive::kPrimLong: { @@ -1865,7 +1974,9 @@ void InstructionCodeGeneratorARM::VisitMul(HMul* mul) { } case Primitive::kPrimFloat: { - __ vmuls(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>()); + __ vmuls(out.AsFpuRegister<SRegister>(), + first.AsFpuRegister<SRegister>(), + second.AsFpuRegister<SRegister>()); break; } @@ -1925,7 +2036,9 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { - __ sdiv(out.As<Register>(), first.As<Register>(), second.As<Register>()); + __ sdiv(out.AsRegister<Register>(), + first.AsRegister<Register>(), + second.AsRegister<Register>()); break; } @@ -1943,7 +2056,9 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ vdivs(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>()); + __ vdivs(out.AsFpuRegister<SRegister>(), + first.AsFpuRegister<SRegister>(), + second.AsFpuRegister<SRegister>()); break; } @@ -2002,16 +2117,16 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { switch (rem->GetResultType()) { case Primitive::kPrimInt: { - Register reg1 = first.As<Register>(); - Register reg2 = second.As<Register>(); - Register temp = locations->GetTemp(0).As<Register>(); + Register reg1 = first.AsRegister<Register>(); + Register reg2 = second.AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); // temp = reg1 / reg2 (integer division) // temp = temp * reg2 // dest = reg1 - temp __ sdiv(temp, reg1, reg2); __ mul(temp, temp, reg2); - __ sub(out.As<Register>(), reg1, ShifterOperand(temp)); + __ sub(out.AsRegister<Register>(), reg1, ShifterOperand(temp)); break; } @@ -2058,7 +2173,7 @@ void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) switch (instruction->GetType()) { case Primitive::kPrimInt: { if (value.IsRegister()) { - __ cmp(value.As<Register>(), ShifterOperand(0)); + __ cmp(value.AsRegister<Register>(), ShifterOperand(0)); __ b(slow_path->GetEntryLabel(), EQ); } else { DCHECK(value.IsConstant()) << value; @@ -2087,6 +2202,124 @@ void InstructionCodeGeneratorARM::VisitDivZeroCheck(HDivZeroCheck* instruction) } } +void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary::CallKind call_kind = op->GetResultType() == Primitive::kPrimLong + ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(op, call_kind); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); + break; + } + case Primitive::kPrimLong: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + // The runtime helper puts the output in R0,R2. + locations->SetOut(Location::RegisterPairLocation(R0, R2)); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + } +} + +void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = op->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + + Primitive::Type type = op->GetResultType(); + switch (type) { + case Primitive::kPrimInt: { + Register out_reg = out.AsRegister<Register>(); + Register first_reg = first.AsRegister<Register>(); + // Arm doesn't mask the shift count so we need to do it ourselves. + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + __ and_(second_reg, second_reg, ShifterOperand(kMaxIntShiftValue)); + if (op->IsShl()) { + __ Lsl(out_reg, first_reg, second_reg); + } else if (op->IsShr()) { + __ Asr(out_reg, first_reg, second_reg); + } else { + __ Lsr(out_reg, first_reg, second_reg); + } + } else { + int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue); + if (shift_value == 0) { // arm does not support shifting with 0 immediate. + __ Mov(out_reg, first_reg); + } else if (op->IsShl()) { + __ Lsl(out_reg, first_reg, shift_value); + } else if (op->IsShr()) { + __ Asr(out_reg, first_reg, shift_value); + } else { + __ Lsr(out_reg, first_reg, shift_value); + } + } + break; + } + case Primitive::kPrimLong: { + // TODO: Inline the assembly instead of calling the runtime. + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>()); + DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); + DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>()); + DCHECK_EQ(R0, out.AsRegisterPairLow<Register>()); + DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>()); + + int32_t entry_point_offset; + if (op->IsShl()) { + entry_point_offset = QUICK_ENTRY_POINT(pShlLong); + } else if (op->IsShr()) { + entry_point_offset = QUICK_ENTRY_POINT(pShrLong); + } else { + entry_point_offset = QUICK_ENTRY_POINT(pUshrLong); + } + __ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset); + __ blx(LR); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << type; + } +} + +void LocationsBuilderARM::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void InstructionCodeGeneratorARM::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void LocationsBuilderARM::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void InstructionCodeGeneratorARM::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void LocationsBuilderARM::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void InstructionCodeGeneratorARM::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); @@ -2152,11 +2385,11 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* not_) { Location in = locations->InAt(0); switch (not_->InputAt(0)->GetType()) { case Primitive::kPrimBoolean: - __ eor(out.As<Register>(), in.As<Register>(), ShifterOperand(1)); + __ eor(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(1)); break; case Primitive::kPrimInt: - __ mvn(out.As<Register>(), ShifterOperand(in.As<Register>())); + __ mvn(out.AsRegister<Register>(), ShifterOperand(in.AsRegister<Register>())); break; case Primitive::kPrimLong: @@ -2174,44 +2407,72 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* not_) { void LocationsBuilderARM::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); + } } void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { LocationSummary* locations = compare->GetLocations(); - switch (compare->InputAt(0)->GetType()) { + Register out = locations->Out().AsRegister<Register>(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + Label less, greater, done; + Primitive::Type type = compare->InputAt(0)->GetType(); + switch (type) { case Primitive::kPrimLong: { - Register output = locations->Out().As<Register>(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - Label less, greater, done; __ cmp(left.AsRegisterPairHigh<Register>(), ShifterOperand(right.AsRegisterPairHigh<Register>())); // Signed compare. __ b(&less, LT); __ b(&greater, GT); - // Do LoadImmediate before any `cmp`, as LoadImmediate might affect - // the status flags. - __ LoadImmediate(output, 0); + // Do LoadImmediate before any `cmp`, as LoadImmediate might affect the status flags. + __ LoadImmediate(out, 0); __ cmp(left.AsRegisterPairLow<Register>(), ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare. - __ b(&done, EQ); - __ b(&less, CC); - - __ Bind(&greater); - __ LoadImmediate(output, 1); - __ b(&done); - - __ Bind(&less); - __ LoadImmediate(output, -1); - - __ Bind(&done); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + __ LoadImmediate(out, 0); + if (type == Primitive::kPrimFloat) { + __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>()); + } else { + __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); + } + __ vmstat(); // transfer FP status register to ARM APSR. + __ b(compare->IsGtBias() ? &greater : &less, VS); // VS for unordered. break; } default: - LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + LOG(FATAL) << "Unexpected compare type " << type; } + __ b(&done, EQ); + __ b(&less, CC); // CC is for both: unsigned compare for longs and 'less than' for floats. + + __ Bind(&greater); + __ LoadImmediate(out, 1); + __ b(&done); + + __ Bind(&less); + __ LoadImmediate(out, -1); + + __ Bind(&done); } void LocationsBuilderARM::VisitPhi(HPhi* instruction) { @@ -2244,32 +2505,32 @@ void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); Primitive::Type field_type = instruction->GetFieldType(); switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ StoreToOffset(kStoreByte, value, obj, offset); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ StoreToOffset(kStoreHalfword, value, obj, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ StoreToOffset(kStoreWord, value, obj, offset); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).As<Register>(); - Register card = locations->GetTemp(1).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); codegen_->MarkGCCard(temp, card, obj, value); } break; @@ -2282,7 +2543,7 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr } case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).As<SRegister>(); + SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); __ StoreSToOffset(value, obj, offset); break; } @@ -2308,37 +2569,37 @@ void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); break; } case Primitive::kPrimByte: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadSignedByte, out, obj, offset); break; } case Primitive::kPrimShort: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); break; } case Primitive::kPrimChar: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); break; } @@ -2351,7 +2612,7 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instr } case Primitive::kPrimFloat: { - SRegister out = locations->Out().As<SRegister>(); + SRegister out = locations->Out().AsFpuRegister<SRegister>(); __ LoadSFromOffset(out, obj, offset); break; } @@ -2385,7 +2646,7 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmp(obj.As<Register>(), ShifterOperand(0)); + __ cmp(obj.AsRegister<Register>(), ShifterOperand(0)); __ b(slow_path->GetEntryLabel(), EQ); } else { DCHECK(obj.IsConstant()) << obj; @@ -2404,18 +2665,19 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>())); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>())); __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset); } break; @@ -2423,12 +2685,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; __ LoadFromOffset(kLoadSignedByte, out, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>())); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>())); __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset); } break; @@ -2436,12 +2699,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_2)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset); } break; @@ -2449,12 +2713,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_2)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset); } break; @@ -2464,12 +2729,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimNot: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; __ LoadFromOffset(kLoadWord, out, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_4)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ LoadFromOffset(kLoadWord, out, IP, data_offset); } break; @@ -2479,10 +2745,11 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Location out = locations->Out(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_8)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset); } break; @@ -2527,7 +2794,7 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type value_type = instruction->GetComponentType(); bool needs_runtime_call = locations->WillCall(); @@ -2538,12 +2805,13 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register value = locations->InAt(2).As<Register>(); + Register value = locations->InAt(2).AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; __ StoreToOffset(kStoreByte, value, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>())); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>())); __ StoreToOffset(kStoreByte, value, IP, data_offset); } break; @@ -2552,12 +2820,13 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register value = locations->InAt(2).As<Register>(); + Register value = locations->InAt(2).AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; __ StoreToOffset(kStoreHalfword, value, obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_2)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); __ StoreToOffset(kStoreHalfword, value, IP, data_offset); } break; @@ -2567,24 +2836,27 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { if (!needs_runtime_call) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).As<Register>(); + Register value = locations->InAt(2).AsRegister<Register>(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; __ StoreToOffset(kStoreWord, value, obj, offset); } else { DCHECK(index.IsRegister()) << index; - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_4)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, value, IP, data_offset); } if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); - Register temp = locations->GetTemp(0).As<Register>(); - Register card = locations->GetTemp(1).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); codegen_->MarkGCCard(temp, card, obj, value); } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction, + instruction->GetDexPc()); } break; } @@ -2593,10 +2865,11 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Location value = locations->InAt(2); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset); } else { - __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_8)); + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), IP, data_offset); } break; @@ -2622,8 +2895,8 @@ void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = instruction->GetLocations(); uint32_t offset = mirror::Array::LengthOffset().Uint32Value(); - Register obj = locations->InAt(0).As<Register>(); - Register out = locations->Out().As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, obj, offset); } @@ -2643,8 +2916,8 @@ void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) { instruction, locations->InAt(0), locations->InAt(1)); codegen_->AddSlowPath(slow_path); - Register index = locations->InAt(0).As<Register>(); - Register length = locations->InAt(1).As<Register>(); + Register index = locations->InAt(0).AsRegister<Register>(); + Register length = locations->InAt(1).AsRegister<Register>(); __ cmp(index, ShifterOperand(length)); __ b(slow_path->GetEntryLabel(), CS); @@ -2725,15 +2998,15 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { if (source.IsRegister()) { if (destination.IsRegister()) { - __ Mov(destination.As<Register>(), source.As<Register>()); + __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else { DCHECK(destination.IsStackSlot()); - __ StoreToOffset(kStoreWord, source.As<Register>(), + __ StoreToOffset(kStoreWord, source.AsRegister<Register>(), SP, destination.GetStackIndex()); } } else if (source.IsStackSlot()) { if (destination.IsRegister()) { - __ LoadFromOffset(kLoadWord, destination.As<Register>(), + __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex()); } else { DCHECK(destination.IsStackSlot()); @@ -2745,7 +3018,7 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { DCHECK(source.GetConstant()->IsIntConstant()); int32_t value = source.GetConstant()->AsIntConstant()->GetValue(); if (destination.IsRegister()) { - __ LoadImmediate(destination.As<Register>(), value); + __ LoadImmediate(destination.AsRegister<Register>(), value); } else { DCHECK(destination.IsStackSlot()); __ LoadImmediate(IP, value); @@ -2777,15 +3050,15 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - DCHECK_NE(source.As<Register>(), IP); - DCHECK_NE(destination.As<Register>(), IP); - __ Mov(IP, source.As<Register>()); - __ Mov(source.As<Register>(), destination.As<Register>()); - __ Mov(destination.As<Register>(), IP); + DCHECK_NE(source.AsRegister<Register>(), IP); + DCHECK_NE(destination.AsRegister<Register>(), IP); + __ Mov(IP, source.AsRegister<Register>()); + __ Mov(source.AsRegister<Register>(), destination.AsRegister<Register>()); + __ Mov(destination.AsRegister<Register>(), IP); } else if (source.IsRegister() && destination.IsStackSlot()) { - Exchange(source.As<Register>(), destination.GetStackIndex()); + Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { - Exchange(destination.As<Register>(), source.GetStackIndex()); + Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); } else { @@ -2811,7 +3084,7 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { - Register out = cls->GetLocations()->Out().As<Register>(); + Register out = cls->GetLocations()->Out().AsRegister<Register>(); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); @@ -2851,7 +3124,8 @@ void InstructionCodeGeneratorARM::VisitClinitCheck(HClinitCheck* check) { SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); - GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).As<Register>()); + GenerateClassInitializationCheck(slow_path, + check->GetLocations()->InAt(0).AsRegister<Register>()); } void InstructionCodeGeneratorARM::GenerateClassInitializationCheck( @@ -2874,37 +3148,37 @@ void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).As<Register>(); + Register cls = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset); break; } case Primitive::kPrimByte: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadSignedByte, out, cls, offset); break; } case Primitive::kPrimShort: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset); break; } case Primitive::kPrimChar: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ LoadFromOffset(kLoadWord, out, cls, offset); break; } @@ -2917,7 +3191,7 @@ void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instructi } case Primitive::kPrimFloat: { - SRegister out = locations->Out().As<SRegister>(); + SRegister out = locations->Out().AsFpuRegister<SRegister>(); __ LoadSFromOffset(out, cls, offset); break; } @@ -2950,32 +3224,32 @@ void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).As<Register>(); + Register cls = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); Primitive::Type field_type = instruction->GetFieldType(); switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ StoreToOffset(kStoreByte, value, cls, offset); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ StoreToOffset(kStoreHalfword, value, cls, offset); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ StoreToOffset(kStoreWord, value, cls, offset); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - Register temp = locations->GetTemp(0).As<Register>(); - Register card = locations->GetTemp(1).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); codegen_->MarkGCCard(temp, card, cls, value); } break; @@ -2988,7 +3262,7 @@ void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instructi } case Primitive::kPrimFloat: { - SRegister value = locations->InAt(1).As<SRegister>(); + SRegister value = locations->InAt(1).AsFpuRegister<SRegister>(); __ StoreSToOffset(value, cls, offset); break; } @@ -3015,10 +3289,10 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); codegen_->AddSlowPath(slow_path); - Register out = load->GetLocations()->Out().As<Register>(); + Register out = load->GetLocations()->Out().AsRegister<Register>(); codegen_->LoadCurrentMethod(out); - __ LoadFromOffset( - kLoadWord, out, out, mirror::ArtMethod::DexCacheStringsOffset().Int32Value()); + __ LoadFromOffset(kLoadWord, out, out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()); + __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); __ cmp(out, ShifterOperand(0)); __ b(slow_path->GetEntryLabel(), EQ); @@ -3032,7 +3306,7 @@ void LocationsBuilderARM::VisitLoadException(HLoadException* load) { } void InstructionCodeGeneratorARM::VisitLoadException(HLoadException* load) { - Register out = load->GetLocations()->Out().As<Register>(); + Register out = load->GetLocations()->Out().AsRegister<Register>(); int32_t offset = Thread::ExceptionOffset<kArmWordSize>().Int32Value(); __ LoadFromOffset(kLoadWord, out, TR, offset); __ LoadImmediate(IP, 0); @@ -3063,9 +3337,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); - Register cls = locations->InAt(1).As<Register>(); - Register out = locations->Out().As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); + Register cls = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Label done, zero; SlowPathCodeARM* slow_path = nullptr; @@ -3110,9 +3384,9 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); - Register cls = locations->InAt(1).As<Register>(); - Register temp = locations->GetTemp(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); + Register cls = locations->InAt(1).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( @@ -3174,9 +3448,9 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr LocationSummary* locations = instruction->GetLocations(); if (instruction->GetResultType() == Primitive::kPrimInt) { - Register first = locations->InAt(0).As<Register>(); - Register second = locations->InAt(1).As<Register>(); - Register out = locations->Out().As<Register>(); + Register first = locations->InAt(0).AsRegister<Register>(); + Register second = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (instruction->IsAnd()) { __ and_(out, first, ShifterOperand(second)); } else if (instruction->IsOr()) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index acc3fd6a25..226e635d05 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -28,7 +28,8 @@ namespace arm { class CodeGeneratorARM; class SlowPathCodeARM; -static constexpr size_t kArmWordSize = 4; +// Use a local definition to prevent copying mistakes. +static constexpr size_t kArmWordSize = kArmPointerSize; static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 }; static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 }; @@ -108,6 +109,7 @@ class LocationsBuilderARM : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleShift(HBinaryOperation* operation); CodeGeneratorARM* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -135,6 +137,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleShift(HBinaryOperation* operation); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2c586a15f6..a61ef2d4f6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -38,7 +38,7 @@ namespace art { namespace arm64 { -// TODO: clean-up some of the constant definitions. +static constexpr bool kExplicitStackOverflowCheck = false; static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>); static constexpr int kCurrentMethodStackOffset = 0; @@ -167,7 +167,7 @@ MemOperand StackOperandFrom(Location location) { return MemOperand(sp, location.GetStackIndex()); } -MemOperand HeapOperand(const Register& base, size_t offset) { +MemOperand HeapOperand(const Register& base, size_t offset = 0) { // A heap reference must be 32bit, so fit in a W register. DCHECK(base.IsW()); return MemOperand(base.X(), offset); @@ -393,6 +393,20 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); }; +class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 { + public: + StackOverflowCheckSlowPathARM64() {} + + virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + __ Bind(GetEntryLabel()); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM64); +}; + class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction, @@ -418,7 +432,6 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return &return_label_; } - private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -437,7 +450,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ Brk(__LINE__); // TODO: Unimplemented TypeCheckSlowPathARM64. - __ b(GetExitLabel()); + __ B(GetExitLabel()); } private: @@ -479,13 +492,30 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph) #undef __ #define __ GetVIXLAssembler()-> +void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { + // Ensure we emit the literal pool. + __ FinalizeCode(); + CodeGenerator::Finalize(allocator); +} + void CodeGeneratorARM64::GenerateFrameEntry() { - // TODO: Add proper support for the stack overflow check. - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireX(); - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); - __ Ldr(temp, MemOperand(temp, 0)); - RecordPcInfo(nullptr, 0); + bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod(); + if (do_overflow_check) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireX(); + if (kExplicitStackOverflowCheck) { + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64(); + AddSlowPath(slow_path); + + __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value())); + __ Cmp(sp, temp); + __ B(lo, slow_path->GetEntryLabel()); + } else { + __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Ldr(wzr, MemOperand(temp, 0)); + RecordPcInfo(nullptr, 0); + } + } CPURegList preserved_regs = GetFramePreservedRegisters(); int frame_size = GetFrameSize(); @@ -588,12 +618,12 @@ Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const { void CodeGeneratorARM64::MarkGCCard(Register object, Register value) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register card = temps.AcquireX(); - Register temp = temps.AcquireX(); + Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. vixl::Label done; __ Cbz(value, &done); __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value())); __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); - __ Strb(card, MemOperand(card, temp)); + __ Strb(card, MemOperand(card, temp.X())); __ Bind(&done); } @@ -601,7 +631,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { // Block reserved registers: // ip0 (VIXL temporary) // ip1 (VIXL temporary) - // xSuspend (Suspend counter) + // tr // lr // sp is not part of the allocatable registers, so we don't need to block it. // TODO: Avoid blocking callee-saved registers, and instead preserve them @@ -772,12 +802,14 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, uint32_t dex_pc) { __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); - RecordPcInfo(instruction, dex_pc); - DCHECK(instruction->IsSuspendCheck() - || instruction->IsBoundsCheck() - || instruction->IsNullCheck() - || instruction->IsDivZeroCheck() - || !IsLeafMethod()); + if (instruction != nullptr) { + RecordPcInfo(instruction, dex_pc); + DCHECK(instruction->IsSuspendCheck() + || instruction->IsBoundsCheck() + || instruction->IsNullCheck() + || instruction->IsDivZeroCheck() + || !IsLeafMethod()); + } } void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, @@ -787,12 +819,30 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset())); __ Cmp(temp, mirror::Class::kStatusInitialized); __ B(lt, slow_path->GetEntryLabel()); - // Even if the initialized flag is set, we may be in a situation where caches are not synced - // properly. Therefore, we do a memory fence. - __ Dmb(InnerShareable, BarrierAll); + // Even if the initialized flag is set, we need to ensure consistent memory ordering. + __ Dmb(InnerShareable, BarrierReads); __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + SuspendCheckSlowPathARM64* slow_path = + new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + codegen_->AddSlowPath(slow_path); + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireW(); + + __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue())); + if (successor == nullptr) { + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ Cbz(temp, codegen_->GetLabelOf(successor)); + __ B(slow_path->GetEntryLabel()); + // slow_path will return to GetLabelOf(successor). + } +} + InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen) : HGraphVisitor(graph), @@ -801,7 +851,6 @@ InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ M(ParallelMove) \ - M(Rem) #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode @@ -894,6 +943,63 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { } } +void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + Primitive::Type type = instr->GetResultType(); + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected shift type " << type; + } +} + +void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + + Primitive::Type type = instr->GetType(); + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + Register dst = OutputRegister(instr); + Register lhs = InputRegisterAt(instr, 0); + Operand rhs = InputOperandAt(instr, 1); + if (rhs.IsImmediate()) { + uint32_t shift_value = (type == Primitive::kPrimInt) + ? static_cast<uint32_t>(rhs.immediate() & kMaxIntShiftValue) + : static_cast<uint32_t>(rhs.immediate() & kMaxLongShiftValue); + if (instr->IsShl()) { + __ Lsl(dst, lhs, shift_value); + } else if (instr->IsShr()) { + __ Asr(dst, lhs, shift_value); + } else { + __ Lsr(dst, lhs, shift_value); + } + } else { + Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W(); + + if (instr->IsShl()) { + __ Lsl(dst, lhs, rhs_reg); + } else if (instr->IsShr()) { + __ Asr(dst, lhs, rhs_reg); + } else { + __ Lsr(dst, lhs, rhs_reg); + } + } + break; + } + default: + LOG(FATAL) << "Unexpected shift operation type " << type; + } +} + void LocationsBuilderARM64::VisitAdd(HAdd* instruction) { HandleBinaryOp(instruction); } @@ -924,17 +1030,17 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Register obj = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); - MemOperand source(obj); + MemOperand source = HeapOperand(obj); UseScratchRegisterScope temps(GetVIXLAssembler()); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = MemOperand(obj, offset); + source = HeapOperand(obj, offset); } else { Register temp = temps.AcquireSameSizeAs(obj); Register index_reg = RegisterFrom(index, Primitive::kPrimInt); __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(type))); - source = MemOperand(temp, offset); + source = HeapOperand(temp, offset); } codegen_->Load(type, OutputCPURegister(instruction), source); @@ -979,17 +1085,17 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { CPURegister value = InputCPURegisterAt(instruction, 2); Location index = locations->InAt(1); size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); - MemOperand destination(obj); + MemOperand destination = HeapOperand(obj); UseScratchRegisterScope temps(GetVIXLAssembler()); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); - destination = MemOperand(obj, offset); + destination = HeapOperand(obj, offset); } else { Register temp = temps.AcquireSameSizeAs(obj); Register index_reg = InputRegisterAt(instruction, 1); __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type))); - destination = MemOperand(temp, offset); + destination = HeapOperand(temp, offset); } codegen_->Store(value_type, value, destination); @@ -1056,29 +1162,59 @@ void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } -void LocationsBuilderARM64::VisitCompare(HCompare* instruction) { +void LocationsBuilderARM64::VisitCompare(HCompare* compare) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + Primitive::Type in_type = compare->InputAt(0)->GetType(); + switch (in_type) { + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << in_type; + } } -void InstructionCodeGeneratorARM64::VisitCompare(HCompare* instruction) { - Primitive::Type in_type = instruction->InputAt(0)->GetType(); +void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { + Primitive::Type in_type = compare->InputAt(0)->GetType(); - DCHECK_EQ(in_type, Primitive::kPrimLong); + // 0 if: left == right + // 1 if: left > right + // -1 if: left < right switch (in_type) { case Primitive::kPrimLong: { - vixl::Label done; - Register result = OutputRegister(instruction); - Register left = InputRegisterAt(instruction, 0); - Operand right = InputOperandAt(instruction, 1); - __ Subs(result.X(), left, right); - __ B(eq, &done); - __ Mov(result, 1); - __ Cneg(result, result, le); - __ Bind(&done); + Register result = OutputRegister(compare); + Register left = InputRegisterAt(compare, 0); + Operand right = InputOperandAt(compare, 1); + + __ Cmp(left, right); + __ Cset(result, ne); + __ Cneg(result, result, lt); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + Register result = OutputRegister(compare); + FPRegister left = InputFPRegisterAt(compare, 0); + FPRegister right = InputFPRegisterAt(compare, 1); + + __ Fcmp(left, right); + if (compare->IsGtBias()) { + __ Cset(result, ne); + } else { + __ Csetm(result, ne); + } + __ Cneg(result, result, compare->IsGtBias() ? mi : gt); break; } default: @@ -1107,7 +1243,7 @@ void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) { Condition cond = ARM64Condition(instruction->GetCondition()); __ Cmp(lhs, rhs); - __ Csel(res, vixl::Assembler::AppropriateZeroRegFor(res), Operand(1), InvertCondition(cond)); + __ Cset(res, cond); } #define FOR_EACH_CONDITION_INSTRUCTION(M) \ @@ -1232,8 +1368,20 @@ void LocationsBuilderARM64::VisitGoto(HGoto* got) { void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { HBasicBlock* successor = got->GetSuccessor(); - // TODO: Support for suspend checks emission. - if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { + DCHECK(!successor->IsExitBlock()); + HBasicBlock* block = got->GetBlock(); + HInstruction* previous = got->GetPrevious(); + HLoopInformation* info = block->GetLoopInformation(); + + if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) { + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; + } + if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); } } @@ -1241,27 +1389,32 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { void LocationsBuilderARM64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); HInstruction* cond = if_instr->InputAt(0); - DCHECK(cond->IsCondition()); - if (cond->AsCondition()->NeedsMaterialization()) { + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { locations->SetInAt(0, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { HInstruction* cond = if_instr->InputAt(0); - DCHECK(cond->IsCondition()); HCondition* condition = cond->AsCondition(); vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); - // TODO: Support constant condition input in VisitIf. - - if (condition->NeedsMaterialization()) { + if (cond->IsIntConstant()) { + int32_t cond_value = cond->AsIntConstant()->GetValue(); + if (cond_value == 1) { + if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { + __ B(true_target); + } + return; + } else { + DCHECK_EQ(cond_value, 0); + } + } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { // The condition instruction has been materialized, compare the output to 0. Location cond_val = if_instr->GetLocations()->InAt(0); DCHECK(cond_val.IsRegister()); __ Cbnz(InputRegisterAt(if_instr, 0), true_target); - } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. @@ -1279,7 +1432,6 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { __ B(arm64_cond, true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { __ B(false_target); } @@ -1292,8 +1444,7 @@ void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction } void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - MemOperand field = MemOperand(InputRegisterAt(instruction, 0), - instruction->GetFieldOffset().Uint32Value()); + MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); } @@ -1307,7 +1458,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins Primitive::Type field_type = instruction->GetFieldType(); CPURegister value = InputCPURegisterAt(instruction, 1); Register obj = InputRegisterAt(instruction, 0); - codegen_->Store(field_type, value, MemOperand(obj, instruction->GetFieldOffset().Uint32Value())); + codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset())); if (field_type == Primitive::kPrimNot) { codegen_->MarkGCCard(obj, Register(value)); } @@ -1336,7 +1487,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &done); // Compare the class of `obj` with `cls`. - __ Ldr(out, MemOperand(obj, mirror::Object::ClassOffset().Int32Value())); + __ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset())); __ Cmp(out, cls); if (instruction->IsClassFinal()) { // Classes must be equal for the instanceof to succeed. @@ -1393,7 +1544,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); Location receiver = invoke->GetLocations()->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); - Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); + Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); // The register ip1 is required to be used for the hidden argument in // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. @@ -1443,14 +1594,12 @@ void InstructionCodeGeneratorARM64::VisitInvokeStatic(HInvokeStatic* invoke) { // temp = method; codegen_->LoadCurrentMethod(temp); // temp = temp->dex_cache_resolved_methods_; - __ Ldr(temp, MemOperand(temp.X(), - mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset())); // temp = temp[index_in_cache]; - __ Ldr(temp, MemOperand(temp.X(), index_in_cache)); + __ Ldr(temp, HeapOperand(temp, index_in_cache)); // lr = temp->entry_point_from_quick_compiled_code_; - __ Ldr(lr, MemOperand(temp.X(), - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).SizeValue())); + __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64WordSize))); // lr(); __ Blr(lr); @@ -1461,24 +1610,24 @@ void InstructionCodeGeneratorARM64::VisitInvokeStatic(HInvokeStatic* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0)); size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); Offset class_offset = mirror::Object::ClassOffset(); - Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); + Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); // temp = object->GetClass(); if (receiver.IsStackSlot()) { - __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex())); - __ Ldr(temp.W(), MemOperand(temp, class_offset.SizeValue())); + __ Ldr(temp, MemOperand(sp, receiver.GetStackIndex())); + __ Ldr(temp, HeapOperand(temp, class_offset)); } else { DCHECK(receiver.IsRegister()); - __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); + __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); } // temp = temp->GetMethodAt(method_offset); - __ Ldr(temp.W(), MemOperand(temp, method_offset)); + __ Ldr(temp, HeapOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); - __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); + __ Ldr(lr, HeapOperand(temp, entry_point.SizeValue())); // lr(); __ Blr(lr); DCHECK(!codegen_->IsLeafMethod()); @@ -1503,7 +1652,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { DCHECK(cls->CanCallRuntime()); codegen_->LoadCurrentMethod(out); __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheResolvedTypesOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); + __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -1550,8 +1699,9 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { Register out = OutputRegister(load); codegen_->LoadCurrentMethod(out); - __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheStringsOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()))); + __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset())); + __ Ldr(out, HeapOperand(out, mirror::Class::DexCacheStringsOffset())); + __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); __ Cbz(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -1793,6 +1943,43 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) { LOG(FATAL) << "Unreachable"; } +void LocationsBuilderARM64::VisitRem(HRem* rem) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); + switch (rem->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { + Primitive::Type type = rem->GetResultType(); + switch (type) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register dividend = InputRegisterAt(rem, 0); + Register divisor = InputRegisterAt(rem, 1); + Register output = OutputRegister(rem); + Register temp = temps.AcquireSameSizeAs(output); + + __ Sdiv(temp, dividend, divisor); + __ Msub(output, temp, divisor, dividend); + break; + } + + default: + LOG(FATAL) << "Unexpected rem type " << type; + } +} + void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); Primitive::Type return_type = instruction->InputAt(0)->GetType(); @@ -1815,6 +2002,22 @@ void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) { __ Br(lr); } +void LocationsBuilderARM64::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void LocationsBuilderARM64::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) { + HandleShift(shr); +} + void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store); Primitive::Type field_type = store->InputAt(1)->GetType(); @@ -1859,9 +2062,8 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - Register cls = InputRegisterAt(instruction, 0); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), MemOperand(cls, offset)); + MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); } void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { @@ -1874,10 +2076,10 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { CPURegister value = InputCPURegisterAt(instruction, 1); Register cls = InputRegisterAt(instruction, 0); - uint32_t offset = instruction->GetFieldOffset().Uint32Value(); + Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); - codegen_->Store(field_type, value, MemOperand(cls, offset)); + codegen_->Store(field_type, value, HeapOperand(cls, offset)); if (field_type == Primitive::kPrimNot) { codegen_->MarkGCCard(cls, Register(value)); } @@ -1888,14 +2090,17 @@ void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { } void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { - // TODO: Improve support for suspend checks. - SuspendCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, nullptr); - codegen_->AddSlowPath(slow_path); - - __ Subs(wSuspend, wSuspend, 1); - __ B(slow_path->GetEntryLabel(), le); - __ Bind(slow_path->GetReturnLabel()); + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); } void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) { @@ -1924,6 +2129,7 @@ void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); Primitive::Type input_type = conversion->GetInputType(); Primitive::Type result_type = conversion->GetResultType(); + DCHECK_NE(input_type, result_type); if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; @@ -1952,17 +2158,34 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers int result_size = Primitive::ComponentSize(result_type); int input_size = Primitive::ComponentSize(input_type); int min_size = kBitsPerByte * std::min(result_size, input_size); + Register output = OutputRegister(conversion); + Register source = InputRegisterAt(conversion, 0); if ((result_type == Primitive::kPrimChar) || ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size); + __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size); } else { - __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size); + __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size); } - return; + } else if (IsFPType(result_type) && IsIntegralType(input_type)) { + CHECK(input_type == Primitive::kPrimInt || input_type == Primitive::kPrimLong); + __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); + } else if (IsIntegralType(result_type) && IsFPType(input_type)) { + CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); + __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); + } else if (IsFPType(result_type) && IsFPType(input_type)) { + __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); + } else { + LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type + << " to " << result_type; } +} + +void LocationsBuilderARM64::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} - LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type - << " to " << result_type; +void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) { + HandleShift(ushr); } void LocationsBuilderARM64::VisitXor(HXor* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 6b71b94532..0e3d25f9aa 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -31,7 +31,9 @@ namespace arm64 { class CodeGeneratorARM64; class SlowPathCodeARM64; -static constexpr size_t kArm64WordSize = 8; +// Use a local definition to prevent copying mistakes. +static constexpr size_t kArm64WordSize = kArm64PointerSize; + static const vixl::Register kParameterCoreRegisters[] = { vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 }; @@ -42,12 +44,10 @@ static const vixl::FPRegister kParameterFPRegisters[] = { static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters); const vixl::Register tr = vixl::x18; // Thread Register -const vixl::Register wSuspend = vixl::w19; // Suspend Register -const vixl::Register xSuspend = vixl::x19; const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); -const vixl::CPURegList runtime_reserved_core_registers(tr, xSuspend, vixl::lr); +const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister, vixl::kXRegSize, kArm64CalleeSaveRefSpills); @@ -108,7 +108,9 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); + void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); + void HandleShift(HBinaryOperation* instr); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -128,6 +130,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { private: void HandleBinaryOp(HBinaryOperation* instr); + void HandleShift(HBinaryOperation* instr); void HandleInvoke(HInvoke* instr); CodeGeneratorARM64* const codegen_; @@ -230,6 +233,8 @@ class CodeGeneratorARM64 : public CodeGenerator { } } + void Finalize(CodeAllocator* allocator) OVERRIDE; + // Code generation helpers. void MoveConstant(vixl::CPURegister destination, HConstant* constant); void MoveHelper(Location destination, Location source, Primitive::Type type); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index b0f36ce0b4..fd794f95d1 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -143,7 +143,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { BoundsCheckSlowPathX86(HBoundsCheck* instruction, Location index_location, Location length_location) - : instruction_(instruction), index_location_(index_location), length_location_(length_location) {} + : instruction_(instruction), + index_location_(index_location), + length_location_(length_location) {} virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); @@ -311,7 +313,8 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { Location::RegisterLocation(calling_convention.GetRegisterAt(1))); if (instruction_->IsInstanceOf()) { - __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInstanceofNonTrivial))); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, + pInstanceofNonTrivial))); } else { DCHECK(instruction_->IsCheckCast()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pCheckCast))); @@ -464,7 +467,8 @@ void CodeGeneratorX86::GenerateFrameEntry() { static const int kFakeReturnRegister = 8; core_spill_mask_ |= (1 << kFakeReturnRegister); - bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); if (!skip_overflow_check && !kExplicitStackOverflowCheck) { __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86)))); RecordPcInfo(nullptr, 0); @@ -567,28 +571,28 @@ void CodeGeneratorX86::Move32(Location destination, Location source) { } if (destination.IsRegister()) { if (source.IsRegister()) { - __ movl(destination.As<Register>(), source.As<Register>()); + __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsFpuRegister()) { - __ movd(destination.As<Register>(), source.As<XmmRegister>()); + __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>()); } else { DCHECK(source.IsStackSlot()); - __ movl(destination.As<Register>(), Address(ESP, source.GetStackIndex())); + __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex())); } } else if (destination.IsFpuRegister()) { if (source.IsRegister()) { - __ movd(destination.As<XmmRegister>(), source.As<Register>()); + __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>()); } else if (source.IsFpuRegister()) { - __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>()); + __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); } else { DCHECK(source.IsStackSlot()); - __ movss(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex())); + __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } } else { DCHECK(destination.IsStackSlot()) << destination; if (source.IsRegister()) { - __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>()); + __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>()); } else if (source.IsFpuRegister()) { - __ movss(Address(ESP, destination.GetStackIndex()), source.As<XmmRegister>()); + __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else { DCHECK(source.IsStackSlot()); __ pushl(Address(ESP, source.GetStackIndex())); @@ -603,19 +607,25 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { } if (destination.IsRegisterPair()) { if (source.IsRegisterPair()) { - __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>()); - __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>()); + EmitParallelMoves( + Location::RegisterLocation(source.AsRegisterPairHigh<Register>()), + Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()), + Location::RegisterLocation(source.AsRegisterPairLow<Register>()), + Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); } else if (source.IsFpuRegister()) { LOG(FATAL) << "Unimplemented"; } else if (source.IsQuickParameter()) { uint16_t register_index = source.GetQuickParameterRegisterIndex(); uint16_t stack_index = source.GetQuickParameterStackIndex(); InvokeDexCallingConvention calling_convention; - __ movl(destination.AsRegisterPairLow<Register>(), - calling_convention.GetRegisterAt(register_index)); - __ movl(destination.AsRegisterPairHigh<Register>(), Address(ESP, - calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize())); + EmitParallelMoves( + Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), + Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), + Location::StackSlot( + calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()), + Location::RegisterLocation(destination.AsRegisterPairHigh<Register>())); } else { + // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex())); __ movl(destination.AsRegisterPairHigh<Register>(), @@ -625,47 +635,52 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { InvokeDexCallingConvention calling_convention; uint16_t register_index = destination.GetQuickParameterRegisterIndex(); uint16_t stack_index = destination.GetQuickParameterStackIndex(); - if (source.IsRegister()) { - __ movl(calling_convention.GetRegisterAt(register_index), source.AsRegisterPairLow<Register>()); - __ movl(Address(ESP, calling_convention.GetStackOffsetOf(stack_index + 1)), - source.AsRegisterPairHigh<Register>()); + if (source.IsRegisterPair()) { + LOG(FATAL) << "Unimplemented"; } else if (source.IsFpuRegister()) { LOG(FATAL) << "Unimplemented"; } else { DCHECK(source.IsDoubleStackSlot()); - __ movl(calling_convention.GetRegisterAt(register_index), - Address(ESP, source.GetStackIndex())); - __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ popl(Address(ESP, calling_convention.GetStackOffsetOf(stack_index + 1))); + EmitParallelMoves( + Location::StackSlot(source.GetStackIndex()), + Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), + Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), + Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); } } else if (destination.IsFpuRegister()) { if (source.IsDoubleStackSlot()) { - __ movsd(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex())); + __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { LOG(FATAL) << "Unimplemented"; } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; if (source.IsRegisterPair()) { + // No conflict possible, so just do the moves. __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>()); __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), source.AsRegisterPairHigh<Register>()); } else if (source.IsQuickParameter()) { + // No conflict possible, so just do the move. InvokeDexCallingConvention calling_convention; uint16_t register_index = source.GetQuickParameterRegisterIndex(); uint16_t stack_index = source.GetQuickParameterStackIndex(); + // Just move the low part. The only time a source is a quick parameter is + // when moving the parameter to its stack locations. And the (Java) caller + // of this method has already done that. __ movl(Address(ESP, destination.GetStackIndex()), calling_convention.GetRegisterAt(register_index)); DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(), static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize))); } else if (source.IsFpuRegister()) { - __ movsd(Address(ESP, destination.GetStackIndex()), source.As<XmmRegister>()); + __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else { DCHECK(source.IsDoubleStackSlot()); - __ pushl(Address(ESP, source.GetStackIndex())); - __ popl(Address(ESP, destination.GetStackIndex())); - __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize))); - __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize))); + EmitParallelMoves( + Location::StackSlot(source.GetStackIndex()), + Location::StackSlot(destination.GetStackIndex()), + Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), + Location::StackSlot(destination.GetHighStackIndex(kX86WordSize))); } } } @@ -681,7 +696,7 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr if (const_to_move->IsIntConstant()) { Immediate imm(const_to_move->AsIntConstant()->GetValue()); if (location.IsRegister()) { - __ movl(location.As<Register>(), imm); + __ movl(location.AsRegister<Register>(), imm); } else if (location.IsStackSlot()) { __ movl(Address(ESP, location.GetStackIndex()), imm); } else { @@ -695,7 +710,8 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value))); } else if (location.IsDoubleStackSlot()) { __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value))); - __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value))); + __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), + Immediate(High32Bits(value))); } else { DCHECK(location.IsConstant()); DCHECK_EQ(location.GetConstant(), instruction); @@ -828,7 +844,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { // Materialized condition, compare against 0. Location lhs = if_instr->GetLocations()->InAt(0); if (lhs.IsRegister()) { - __ cmpl(lhs.As<Register>(), Immediate(0)); + __ cmpl(lhs.AsRegister<Register>(), Immediate(0)); } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } @@ -843,13 +859,13 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { // LHS is guaranteed to be in a register (see // LocationsBuilderX86::VisitCondition). if (rhs.IsRegister()) { - __ cmpl(lhs.As<Register>(), rhs.As<Register>()); + __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { HIntConstant* instruction = rhs.GetConstant()->AsIntConstant(); Immediate imm(instruction->AsIntConstant()->GetValue()); - __ cmpl(lhs.As<Register>(), imm); + __ cmpl(lhs.AsRegister<Register>(), imm); } else { - __ cmpl(lhs.As<Register>(), Address(ESP, rhs.GetStackIndex())); + __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } __ j(X86Condition(cond->AsCondition()->GetCondition()), codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); @@ -920,18 +936,18 @@ void LocationsBuilderX86::VisitCondition(HCondition* comp) { void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { if (comp->NeedsMaterialization()) { LocationSummary* locations = comp->GetLocations(); - Register reg = locations->Out().As<Register>(); + Register reg = locations->Out().AsRegister<Register>(); // Clear register: setcc only sets the low byte. __ xorl(reg, reg); if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).As<Register>(), - locations->InAt(1).As<Register>()); + __ cmpl(locations->InAt(0).AsRegister<Register>(), + locations->InAt(1).AsRegister<Register>()); } else if (locations->InAt(1).IsConstant()) { HConstant* instruction = locations->InAt(1).GetConstant(); Immediate imm(instruction->AsIntConstant()->GetValue()); - __ cmpl(locations->InAt(0).As<Register>(), imm); + __ cmpl(locations->InAt(0).AsRegister<Register>(), imm); } else { - __ cmpl(locations->InAt(0).As<Register>(), + __ cmpl(locations->InAt(0).AsRegister<Register>(), Address(ESP, locations->InAt(1).GetStackIndex())); } __ setb(X86Condition(comp->GetCondition()), reg); @@ -1078,7 +1094,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimNot: - DCHECK_EQ(ret->GetLocations()->InAt(0).As<Register>(), EAX); + DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX); break; case Primitive::kPrimLong: @@ -1088,7 +1104,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK_EQ(ret->GetLocations()->InAt(0).As<XmmRegister>(), XMM0); + DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0); break; default: @@ -1104,7 +1120,7 @@ void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) { } void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).As<Register>(); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); // TODO: Implement all kinds of calls: // 1) boot -> boot @@ -1120,7 +1136,8 @@ void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) { // temp = temp[index_in_cache] __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())); + __ call(Address( + temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1168,7 +1185,7 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { } void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).As<Register>(); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); LocationSummary* locations = invoke->GetLocations(); @@ -1179,12 +1196,13 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { __ movl(temp, Address(ESP, receiver.GetStackIndex())); __ movl(temp, Address(temp, class_offset)); } else { - __ movl(temp, Address(receiver.As<Register>(), class_offset)); + __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())); + __ call(Address( + temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1198,7 +1216,7 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).As<Register>(); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); LocationSummary* locations = invoke->GetLocations(); @@ -1207,20 +1225,20 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) // Set the hidden argument. __ movl(temp, Immediate(invoke->GetDexMethodIndex())); - __ movd(invoke->GetLocations()->GetTemp(1).As<XmmRegister>(), temp); + __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp); // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ movl(temp, Address(ESP, receiver.GetStackIndex())); __ movl(temp, Address(temp, class_offset)); } else { - __ movl(temp, Address(receiver.As<Register>(), class_offset)); + __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); } // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86PointerSize).Int32Value())); + kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1237,11 +1255,16 @@ void LocationsBuilderX86::VisitNeg(HNeg* neg) { break; case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); - // Output overlaps as we need a fresh (zero-initialized) - // register to perform subtraction from zero. - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1257,7 +1280,7 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { case Primitive::kPrimInt: DCHECK(in.IsRegister()); DCHECK(in.Equals(out)); - __ negl(out.As<Register>()); + __ negl(out.AsRegister<Register>()); break; case Primitive::kPrimLong: @@ -1273,21 +1296,29 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { __ negl(out.AsRegisterPairHigh<Register>()); break; - case Primitive::kPrimFloat: - DCHECK(!in.Equals(out)); - // out = 0 - __ xorps(out.As<XmmRegister>(), out.As<XmmRegister>()); - // out = out - in - __ subss(out.As<XmmRegister>(), in.As<XmmRegister>()); + case Primitive::kPrimFloat: { + DCHECK(in.Equals(out)); + Register constant = locations->GetTemp(0).AsRegister<Register>(); + XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + // Implement float negation with an exclusive or with value + // 0x80000000 (mask for bit 31, representing the sign of a + // single-precision floating-point number). + __ movl(constant, Immediate(INT32_C(0x80000000))); + __ movd(mask, constant); + __ xorps(out.AsFpuRegister<XmmRegister>(), mask); break; + } - case Primitive::kPrimDouble: - DCHECK(!in.Equals(out)); - // out = 0 - __ xorpd(out.As<XmmRegister>(), out.As<XmmRegister>()); - // out = out - in - __ subsd(out.As<XmmRegister>(), in.As<XmmRegister>()); + case Primitive::kPrimDouble: { + DCHECK(in.Equals(out)); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Implement double negation with an exclusive or with value + // 0x8000000000000000 (mask for bit 63, representing the sign of + // a double-precision floating-point number). + __ LoadLongConstant(mask, INT64_C(0x8000000000000000)); + __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); break; + } default: LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); @@ -1299,6 +1330,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1380,7 +1412,6 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimChar: // Processing a Dex `int-to-char' instruction. locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -1404,6 +1435,13 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-float' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1427,6 +1465,13 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1450,6 +1495,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio Location in = locations->InAt(0); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1458,13 +1504,13 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimChar: // Processing a Dex `int-to-byte' instruction. if (in.IsRegister()) { - __ movsxb(out.As<Register>(), in.As<ByteRegister>()); + __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); } else if (in.IsStackSlot()) { - __ movsxb(out.As<Register>(), Address(ESP, in.GetStackIndex())); + __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); - __ movl(out.As<Register>(), Immediate(static_cast<int8_t>(value))); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); } break; @@ -1481,13 +1527,13 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimChar: // Processing a Dex `int-to-short' instruction. if (in.IsRegister()) { - __ movsxw(out.As<Register>(), in.As<Register>()); + __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>()); } else if (in.IsStackSlot()) { - __ movsxw(out.As<Register>(), Address(ESP, in.GetStackIndex())); + __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); - __ movl(out.As<Register>(), Immediate(static_cast<int16_t>(value))); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value))); } break; @@ -1502,14 +1548,14 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimLong: // Processing a Dex `long-to-int' instruction. if (in.IsRegisterPair()) { - __ movl(out.As<Register>(), in.AsRegisterPairLow<Register>()); + __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); } else if (in.IsDoubleStackSlot()) { - __ movl(out.As<Register>(), Address(ESP, in.GetStackIndex())); + __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.IsConstant()); DCHECK(in.GetConstant()->IsLongConstant()); int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); - __ movl(out.As<Register>(), Immediate(static_cast<int32_t>(value))); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value))); } break; @@ -1534,7 +1580,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio // Processing a Dex `int-to-long' instruction. DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX); DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX); - DCHECK_EQ(in.As<Register>(), EAX); + DCHECK_EQ(in.AsRegister<Register>(), EAX); __ cdq(); break; @@ -1555,16 +1601,15 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimChar: // Processing a Dex `Process a Dex `int-to-char'' instruction. if (in.IsRegister()) { - __ movzxw(out.As<Register>(), in.As<Register>()); + __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>()); } else if (in.IsStackSlot()) { - __ movzxw(out.As<Register>(), Address(ESP, in.GetStackIndex())); + __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); - __ movl(out.As<Register>(), Immediate(static_cast<uint16_t>(value))); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); } break; @@ -1576,15 +1621,48 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimFloat: switch (input_type) { - // Processing a Dex `int-to-float' instruction. case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimChar: - __ cvtsi2ss(out.As<XmmRegister>(), in.As<Register>()); + // Processing a Dex `int-to-float' instruction. + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>()); break; - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + // Processing a Dex `long-to-float' instruction. + Register low = in.AsRegisterPairLow<Register>(); + Register high = in.AsRegisterPairHigh<Register>(); + XmmRegister result = out.AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + + // Operations use doubles for precision reasons (each 32-bit + // half of a long fits in the 53-bit mantissa of a double, + // but not in the 24-bit mantissa of a float). This is + // especially important for the low bits. The result is + // eventually converted to float. + + // low = low - 2^31 (to prevent bit 31 of `low` to be + // interpreted as a sign bit) + __ subl(low, Immediate(0x80000000)); + // temp = int-to-double(high) + __ cvtsi2sd(temp, high); + // temp = temp * 2^32 + __ LoadLongConstant(constant, k2Pow32EncodingForDouble); + __ mulsd(temp, constant); + // result = int-to-double(low) + __ cvtsi2sd(result, low); + // result = result + 2^31 (restore the original value of `low`) + __ LoadLongConstant(constant, k2Pow31EncodingForDouble); + __ addsd(result, constant); + // result = result + temp + __ addsd(result, temp); + // result = double-to-float(result) + __ cvtsd2ss(result, result); + break; + } + case Primitive::kPrimDouble: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1598,15 +1676,40 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio case Primitive::kPrimDouble: switch (input_type) { - // Processing a Dex `int-to-double' instruction. case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimChar: - __ cvtsi2sd(out.As<XmmRegister>(), in.As<Register>()); + // Processing a Dex `int-to-double' instruction. + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>()); break; - case Primitive::kPrimLong: + case Primitive::kPrimLong: { + // Processing a Dex `long-to-double' instruction. + Register low = in.AsRegisterPairLow<Register>(); + Register high = in.AsRegisterPairHigh<Register>(); + XmmRegister result = out.AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + + // low = low - 2^31 (to prevent bit 31 of `low` to be + // interpreted as a sign bit) + __ subl(low, Immediate(0x80000000)); + // temp = int-to-double(high) + __ cvtsi2sd(temp, high); + // temp = temp * 2^32 + __ LoadLongConstant(constant, k2Pow32EncodingForDouble); + __ mulsd(temp, constant); + // result = int-to-double(low) + __ cvtsi2sd(result, low); + // result = result + 2^31 (restore the original value of `low`) + __ LoadLongConstant(constant, k2Pow31EncodingForDouble); + __ addsd(result, constant); + // result = result + temp + __ addsd(result, temp); + break; + } + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1658,11 +1761,12 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { switch (add->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ addl(first.As<Register>(), second.As<Register>()); + __ addl(first.AsRegister<Register>(), second.AsRegister<Register>()); } else if (second.IsConstant()) { - __ addl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + __ addl(first.AsRegister<Register>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); } else { - __ addl(first.As<Register>(), Address(ESP, second.GetStackIndex())); + __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } break; } @@ -1681,18 +1785,18 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { case Primitive::kPrimFloat: { if (second.IsFpuRegister()) { - __ addss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else { - __ addss(first.As<XmmRegister>(), Address(ESP, second.GetStackIndex())); + __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); } break; } case Primitive::kPrimDouble: { if (second.IsFpuRegister()) { - __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else { - __ addsd(first.As<XmmRegister>(), Address(ESP, second.GetStackIndex())); + __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); } break; } @@ -1734,11 +1838,12 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { switch (sub->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ subl(first.As<Register>(), second.As<Register>()); + __ subl(first.AsRegister<Register>(), second.AsRegister<Register>()); } else if (second.IsConstant()) { - __ subl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + __ subl(first.AsRegister<Register>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); } else { - __ subl(first.As<Register>(), Address(ESP, second.GetStackIndex())); + __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } break; } @@ -1756,12 +1861,12 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ subss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - __ subsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -1816,13 +1921,13 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { switch (mul->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ imull(first.As<Register>(), second.As<Register>()); + __ imull(first.AsRegister<Register>(), second.AsRegister<Register>()); } else if (second.IsConstant()) { Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ imull(first.As<Register>(), imm); + __ imull(first.AsRegister<Register>(), imm); } else { DCHECK(second.IsStackSlot()); - __ imull(first.As<Register>(), Address(ESP, second.GetStackIndex())); + __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } break; } @@ -1834,8 +1939,8 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { Register in1_lo = first.AsRegisterPairLow<Register>(); Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); Address in2_lo(ESP, second.GetStackIndex()); - Register eax = locations->GetTemp(0).As<Register>(); - Register edx = locations->GetTemp(1).As<Register>(); + Register eax = locations->GetTemp(0).AsRegister<Register>(); + Register edx = locations->GetTemp(1).AsRegister<Register>(); DCHECK_EQ(EAX, eax); DCHECK_EQ(EDX, edx); @@ -1866,12 +1971,12 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } case Primitive::kPrimFloat: { - __ mulss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - __ mulsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -1891,12 +1996,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr switch (instruction->GetResultType()) { case Primitive::kPrimInt: { - Register second_reg = second.As<Register>(); - DCHECK_EQ(EAX, first.As<Register>()); - DCHECK_EQ(is_div ? EAX : EDX, out.As<Register>()); + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(EAX, first.AsRegister<Register>()); + DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); SlowPathCodeX86* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.As<Register>(), is_div); + new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(), + is_div); codegen_->AddSlowPath(slow_path); // 0x80000000/-1 triggers an arithmetic exception! @@ -1995,13 +2101,13 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { case Primitive::kPrimFloat: { DCHECK(first.Equals(out)); - __ divss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { DCHECK(first.Equals(out)); - __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -2095,7 +2201,7 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) switch (instruction->GetType()) { case Primitive::kPrimInt: { if (value.IsRegister()) { - __ testl(value.As<Register>(), value.As<Register>()); + __ testl(value.AsRegister<Register>(), value.AsRegister<Register>()); __ j(kEqual, slow_path->GetEntryLabel()); } else if (value.IsStackSlot()) { __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0)); @@ -2110,7 +2216,7 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) } case Primitive::kPrimLong: { if (value.IsRegisterPair()) { - Register temp = locations->GetTemp(0).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); __ movl(temp, value.AsRegisterPairLow<Register>()); __ orl(temp, value.AsRegisterPairHigh<Register>()); __ j(kEqual, slow_path->GetEntryLabel()); @@ -2127,6 +2233,139 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) } } +void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL. + locations->SetInAt(1, Location::RegisterLocation(ECX)); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + default: + LOG(FATAL) << "Unexpected op type " << op->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = op->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + DCHECK(first.Equals(locations->Out())); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: { + Register first_reg = first.AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarl(first_reg, second_reg); + } else { + __ shrl(first_reg, second_reg); + } + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); + if (op->IsShl()) { + __ shll(first_reg, imm); + } else if (op->IsShr()) { + __ sarl(first_reg, imm); + } else { + __ shrl(first_reg, imm); + } + } + break; + } + case Primitive::kPrimLong: { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } + break; + } + default: + LOG(FATAL) << "Unexpected op type " << op->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { + Label done; + __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); + __ shll(loc.AsRegisterPairLow<Register>(), shifter); + __ testl(shifter, Immediate(32)); + __ j(kEqual, &done); + __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>()); + __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0)); + __ Bind(&done); +} + +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { + Label done; + __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); + __ sarl(loc.AsRegisterPairHigh<Register>(), shifter); + __ testl(shifter, Immediate(32)); + __ j(kEqual, &done); + __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>()); + __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31)); + __ Bind(&done); +} + +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { + Label done; + __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); + __ shrl(loc.AsRegisterPairHigh<Register>(), shifter); + __ testl(shifter, Immediate(32)); + __ j(kEqual, &done); + __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>()); + __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0)); + __ Bind(&done); +} + +void LocationsBuilderX86::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void InstructionCodeGeneratorX86::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void LocationsBuilderX86::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void InstructionCodeGeneratorX86::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void LocationsBuilderX86::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); @@ -2200,11 +2439,11 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { DCHECK(in.Equals(out)); switch (not_->InputAt(0)->GetType()) { case Primitive::kPrimBoolean: - __ xorl(out.As<Register>(), Immediate(1)); + __ xorl(out.AsRegister<Register>(), Immediate(1)); break; case Primitive::kPrimInt: - __ notl(out.As<Register>()); + __ notl(out.AsRegister<Register>()); break; case Primitive::kPrimLong: @@ -2220,20 +2459,36 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { void LocationsBuilderX86::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + // TODO: we set any here but we don't handle constants + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); + } } void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { LocationSummary* locations = compare->GetLocations(); + Register out = locations->Out().AsRegister<Register>(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + Label less, greater, done; switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { - Label less, greater, done; - Register output = locations->Out().As<Register>(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - if (right.IsRegister()) { + if (right.IsRegisterPair()) { __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>()); } else { DCHECK(right.IsDoubleStackSlot()); @@ -2248,23 +2503,33 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { DCHECK(right.IsDoubleStackSlot()); __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex())); } - __ movl(output, Immediate(0)); - __ j(kEqual, &done); - __ j(kBelow, &less); // Unsigned compare. - - __ Bind(&greater); - __ movl(output, Immediate(1)); - __ jmp(&done); - - __ Bind(&less); - __ movl(output, Immediate(-1)); - - __ Bind(&done); + break; + } + case Primitive::kPrimFloat: { + __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + break; + } + case Primitive::kPrimDouble: { + __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + __ j(kUnordered, compare->IsGtBias() ? &greater : &less); break; } default: - LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); } + __ movl(out, Immediate(0)); + __ j(kEqual, &done); + __ j(kBelow, &less); // kBelow is for CF (unsigned & floats). + + __ Bind(&greater); + __ movl(out, Immediate(1)); + __ jmp(&done); + + __ Bind(&less); + __ movl(out, Immediate(-1)); + + __ Bind(&done); } void LocationsBuilderX86::VisitPhi(HPhi* instruction) { @@ -2309,33 +2574,33 @@ void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); Primitive::Type field_type = instruction->GetFieldType(); switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).As<ByteRegister>(); + ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); __ movb(Address(obj, offset), value); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ movw(Address(obj, offset), value); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ movl(Address(obj, offset), value); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).As<Register>(); - Register card = locations->GetTemp(1).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); codegen_->MarkGCCard(temp, card, obj, value); } break; @@ -2349,13 +2614,13 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movss(Address(obj, offset), value); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movsd(Address(obj, offset), value); break; } @@ -2387,37 +2652,37 @@ void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movzxb(out, Address(obj, offset)); break; } case Primitive::kPrimByte: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movsxb(out, Address(obj, offset)); break; } case Primitive::kPrimShort: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movsxw(out, Address(obj, offset)); break; } case Primitive::kPrimChar: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movzxw(out, Address(obj, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(obj, offset)); break; } @@ -2430,13 +2695,13 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instr } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movss(out, Address(obj, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movsd(out, Address(obj, offset)); break; } @@ -2464,7 +2729,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.As<Register>(), Immediate(0)); + __ cmpl(obj.AsRegister<Register>(), Immediate(0)); } else if (obj.IsStackSlot()) { __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0)); } else { @@ -2486,54 +2751,54 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); } else { - __ movzxb(out, Address(obj, index.As<Register>(), TIMES_1, data_offset)); + __ movzxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset)); } break; } case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); } else { - __ movsxb(out, Address(obj, index.As<Register>(), TIMES_1, data_offset)); + __ movsxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset)); } break; } case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); } else { - __ movsxw(out, Address(obj, index.As<Register>(), TIMES_2, data_offset)); + __ movsxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset)); } break; } case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); } else { - __ movzxw(out, Address(obj, index.As<Register>(), TIMES_2, data_offset)); + __ movzxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset)); } break; } @@ -2541,12 +2806,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { __ movl(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); } else { - __ movl(out, Address(obj, index.As<Register>(), TIMES_4, data_offset)); + __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset)); } break; } @@ -2560,9 +2825,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize)); } else { __ movl(out.AsRegisterPairLow<Register>(), - Address(obj, index.As<Register>(), TIMES_8, data_offset)); + Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset)); __ movl(out.AsRegisterPairHigh<Register>(), - Address(obj, index.As<Register>(), TIMES_8, data_offset + kX86WordSize)); + Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize)); } break; } @@ -2622,7 +2887,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); @@ -2637,17 +2902,17 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; if (value.IsRegister()) { - __ movb(Address(obj, offset), value.As<ByteRegister>()); + __ movb(Address(obj, offset), value.AsRegister<ByteRegister>()); } else { __ movb(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } else { if (value.IsRegister()) { - __ movb(Address(obj, index.As<Register>(), TIMES_1, data_offset), - value.As<ByteRegister>()); + __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset), + value.AsRegister<ByteRegister>()); } else { - __ movb(Address(obj, index.As<Register>(), TIMES_1, data_offset), + __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } @@ -2660,17 +2925,17 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; if (value.IsRegister()) { - __ movw(Address(obj, offset), value.As<Register>()); + __ movw(Address(obj, offset), value.AsRegister<Register>()); } else { __ movw(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } else { if (value.IsRegister()) { - __ movw(Address(obj, index.As<Register>(), TIMES_2, data_offset), - value.As<Register>()); + __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset), + value.AsRegister<Register>()); } else { - __ movw(Address(obj, index.As<Register>(), TIMES_2, data_offset), + __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } @@ -2682,9 +2947,10 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { if (!needs_runtime_call) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; if (value.IsRegister()) { - __ movl(Address(obj, offset), value.As<Register>()); + __ movl(Address(obj, offset), value.AsRegister<Register>()); } else { DCHECK(value.IsConstant()) << value; __ movl(Address(obj, offset), @@ -2693,19 +2959,19 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } else { DCHECK(index.IsRegister()) << index; if (value.IsRegister()) { - __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset), - value.As<Register>()); + __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), + value.AsRegister<Register>()); } else { DCHECK(value.IsConstant()) << value; - __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset), + __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } if (needs_write_barrier) { - Register temp = locations->GetTemp(0).As<Register>(); - Register card = locations->GetTemp(1).As<Register>(); - codegen_->MarkGCCard(temp, card, obj, value.As<Register>()); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, card, obj, value.AsRegister<Register>()); } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); @@ -2731,16 +2997,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } } else { if (value.IsRegisterPair()) { - __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset), + __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), value.AsRegisterPairLow<Register>()); - __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset + kX86WordSize), + __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { DCHECK(value.IsConstant()); int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); - __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset), + __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), Immediate(Low32Bits(val))); - __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset + kX86WordSize), + __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), Immediate(High32Bits(val))); } } @@ -2767,8 +3033,8 @@ void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = instruction->GetLocations(); uint32_t offset = mirror::Array::LengthOffset().Uint32Value(); - Register obj = locations->InAt(0).As<Register>(); - Register out = locations->Out().As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(obj, offset)); } @@ -2788,8 +3054,8 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { instruction, locations->InAt(0), locations->InAt(1)); codegen_->AddSlowPath(slow_path); - Register index = locations->InAt(0).As<Register>(); - Register length = locations->InAt(1).As<Register>(); + Register index = locations->InAt(0).AsRegister<Register>(); + Register length = locations->InAt(1).AsRegister<Register>(); __ cmpl(index, length); __ j(kAboveEqual, slow_path->GetEntryLabel()); @@ -2866,14 +3132,14 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { if (source.IsRegister()) { if (destination.IsRegister()) { - __ movl(destination.As<Register>(), source.As<Register>()); + __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else { DCHECK(destination.IsStackSlot()); - __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>()); + __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>()); } } else if (source.IsStackSlot()) { if (destination.IsRegister()) { - __ movl(destination.As<Register>(), Address(ESP, source.GetStackIndex())); + __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); MoveMemoryToMemory(destination.GetStackIndex(), @@ -2883,7 +3149,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { HIntConstant* instruction = source.GetConstant()->AsIntConstant(); Immediate imm(instruction->AsIntConstant()->GetValue()); if (destination.IsRegister()) { - __ movl(destination.As<Register>(), imm); + __ movl(destination.AsRegister<Register>(), imm); } else { __ movl(Address(ESP, destination.GetStackIndex()), imm); } @@ -2925,11 +3191,11 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.As<Register>(), source.As<Register>()); + __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { - Exchange(source.As<Register>(), destination.GetStackIndex()); + Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { - Exchange(destination.As<Register>(), source.GetStackIndex()); + Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(destination.GetStackIndex(), source.GetStackIndex()); } else { @@ -2955,7 +3221,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { } void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { - Register out = cls->GetLocations()->Out().As<Register>(); + Register out = cls->GetLocations()->Out().AsRegister<Register>(); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); @@ -2994,7 +3260,8 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); - GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).As<Register>()); + GenerateClassInitializationCheck(slow_path, + check->GetLocations()->InAt(0).AsRegister<Register>()); } void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( @@ -3015,37 +3282,37 @@ void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).As<Register>(); + Register cls = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movzxb(out, Address(cls, offset)); break; } case Primitive::kPrimByte: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movsxb(out, Address(cls, offset)); break; } case Primitive::kPrimShort: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movsxw(out, Address(cls, offset)); break; } case Primitive::kPrimChar: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movzxw(out, Address(cls, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); __ movl(out, Address(cls, offset)); break; } @@ -3058,13 +3325,13 @@ void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instructi } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movss(out, Address(cls, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movsd(out, Address(cls, offset)); break; } @@ -3102,33 +3369,33 @@ void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register cls = locations->InAt(0).As<Register>(); + Register cls = locations->InAt(0).AsRegister<Register>(); uint32_t offset = instruction->GetFieldOffset().Uint32Value(); Primitive::Type field_type = instruction->GetFieldType(); switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - ByteRegister value = locations->InAt(1).As<ByteRegister>(); + ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>(); __ movb(Address(cls, offset), value); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ movw(Address(cls, offset), value); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - Register value = locations->InAt(1).As<Register>(); + Register value = locations->InAt(1).AsRegister<Register>(); __ movl(Address(cls, offset), value); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - Register temp = locations->GetTemp(0).As<Register>(); - Register card = locations->GetTemp(1).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); codegen_->MarkGCCard(temp, card, cls, value); } break; @@ -3142,13 +3409,13 @@ void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instructi } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movss(Address(cls, offset), value); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movsd(Address(cls, offset), value); break; } @@ -3169,9 +3436,10 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); - Register out = load->GetLocations()->Out().As<Register>(); + Register out = load->GetLocations()->Out().AsRegister<Register>(); codegen_->LoadCurrentMethod(out); - __ movl(out, Address(out, mirror::ArtMethod::DexCacheStringsOffset().Int32Value())); + __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value())); + __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); @@ -3186,7 +3454,7 @@ void LocationsBuilderX86::VisitLoadException(HLoadException* load) { void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) { Address address = Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value()); - __ fs()->movl(load->GetLocations()->Out().As<Register>(), address); + __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), address); __ fs()->movl(address, Immediate(0)); } @@ -3214,9 +3482,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); Location cls = locations->InAt(1); - Register out = locations->Out().As<Register>(); + Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Label done, zero; SlowPathCodeX86* slow_path = nullptr; @@ -3228,7 +3496,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ movl(out, Address(obj, class_offset)); // Compare the class of `obj` with `cls`. if (cls.IsRegister()) { - __ cmpl(out, cls.As<Register>()); + __ cmpl(out, cls.AsRegister<Register>()); } else { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(out, Address(ESP, cls.GetStackIndex())); @@ -3267,9 +3535,9 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).As<Register>(); + Register obj = locations->InAt(0).AsRegister<Register>(); Location cls = locations->InAt(1); - Register temp = locations->GetTemp(0).As<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86( instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); @@ -3282,7 +3550,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { // Compare the class of `obj` with `cls`. if (cls.IsRegister()) { - __ cmpl(temp, cls.As<Register>()); + __ cmpl(temp, cls.AsRegister<Register>()); } else { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(temp, Address(ESP, cls.GetStackIndex())); @@ -3341,30 +3609,33 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr if (instruction->GetResultType() == Primitive::kPrimInt) { if (second.IsRegister()) { if (instruction->IsAnd()) { - __ andl(first.As<Register>(), second.As<Register>()); + __ andl(first.AsRegister<Register>(), second.AsRegister<Register>()); } else if (instruction->IsOr()) { - __ orl(first.As<Register>(), second.As<Register>()); + __ orl(first.AsRegister<Register>(), second.AsRegister<Register>()); } else { DCHECK(instruction->IsXor()); - __ xorl(first.As<Register>(), second.As<Register>()); + __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>()); } } else if (second.IsConstant()) { if (instruction->IsAnd()) { - __ andl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + __ andl(first.AsRegister<Register>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); } else if (instruction->IsOr()) { - __ orl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + __ orl(first.AsRegister<Register>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); } else { DCHECK(instruction->IsXor()); - __ xorl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + __ xorl(first.AsRegister<Register>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); } } else { if (instruction->IsAnd()) { - __ andl(first.As<Register>(), Address(ESP, second.GetStackIndex())); + __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } else if (instruction->IsOr()) { - __ orl(first.As<Register>(), Address(ESP, second.GetStackIndex())); + __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } else { DCHECK(instruction->IsXor()); - __ xorl(first.As<Register>(), Address(ESP, second.GetStackIndex())); + __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex())); } } } else { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 8252f81c7f..aed06c04df 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -25,7 +25,8 @@ namespace art { namespace x86 { -static constexpr size_t kX86WordSize = 4; +// Use a local definition to prevent copying mistakes. +static constexpr size_t kX86WordSize = kX86PointerSize; class CodeGeneratorX86; class SlowPathCodeX86; @@ -103,6 +104,7 @@ class LocationsBuilderX86 : public HGraphVisitor { private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); + void HandleShift(HBinaryOperation* instruction); CodeGeneratorX86* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -131,6 +133,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void HandleShift(HBinaryOperation* instruction); + void GenerateShlLong(const Location& loc, Register shifter); + void GenerateShrLong(const Location& loc, Register shifter); + void GenerateUShrLong(const Location& loc, Register shifter); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index e9c67e3e6d..4d70efcf38 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -539,37 +539,37 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } if (destination.IsRegister()) { if (source.IsRegister()) { - __ movq(destination.As<CpuRegister>(), source.As<CpuRegister>()); + __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsFpuRegister()) { - __ movd(destination.As<CpuRegister>(), source.As<XmmRegister>()); + __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>()); } else if (source.IsStackSlot()) { - __ movl(destination.As<CpuRegister>(), + __ movl(destination.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(source.IsDoubleStackSlot()); - __ movq(destination.As<CpuRegister>(), + __ movq(destination.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } } else if (destination.IsFpuRegister()) { if (source.IsRegister()) { - __ movd(destination.As<XmmRegister>(), source.As<CpuRegister>()); + __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsFpuRegister()) { - __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>()); + __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); } else if (source.IsStackSlot()) { - __ movss(destination.As<XmmRegister>(), + __ movss(destination.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(source.IsDoubleStackSlot()); - __ movsd(destination.As<XmmRegister>(), + __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } } else if (destination.IsStackSlot()) { if (source.IsRegister()) { __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<CpuRegister>()); + source.AsRegister<CpuRegister>()); } else if (source.IsFpuRegister()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<XmmRegister>()); + source.AsFpuRegister<XmmRegister>()); } else { DCHECK(source.IsStackSlot()); __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -579,10 +579,10 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { DCHECK(destination.IsDoubleStackSlot()); if (source.IsRegister()) { __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<CpuRegister>()); + source.AsRegister<CpuRegister>()); } else if (source.IsFpuRegister()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<XmmRegister>()); + source.AsFpuRegister<XmmRegister>()); } else { DCHECK(source.IsDoubleStackSlot()); __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -604,7 +604,7 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, if (const_to_move->IsIntConstant()) { Immediate imm(const_to_move->AsIntConstant()->GetValue()); if (location.IsRegister()) { - __ movl(location.As<CpuRegister>(), imm); + __ movl(location.AsRegister<CpuRegister>(), imm); } else if (location.IsStackSlot()) { __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm); } else { @@ -614,7 +614,7 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, } else if (const_to_move->IsLongConstant()) { int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegister()) { - __ movq(location.As<CpuRegister>(), Immediate(value)); + __ movq(location.AsRegister<CpuRegister>(), Immediate(value)); } else if (location.IsDoubleStackSlot()) { __ movq(CpuRegister(TMP), Immediate(value)); __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); @@ -637,7 +637,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, case Primitive::kPrimLong: case Primitive::kPrimDouble: - Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal()))); + Move(location, + Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal()))); break; default: @@ -741,7 +742,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // Materialized condition, compare against 0. Location lhs = if_instr->GetLocations()->InAt(0); if (lhs.IsRegister()) { - __ cmpl(lhs.As<CpuRegister>(), Immediate(0)); + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0)); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); @@ -755,12 +756,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { Location lhs = cond->GetLocations()->InAt(0); Location rhs = cond->GetLocations()->InAt(1); if (rhs.IsRegister()) { - __ cmpl(lhs.As<CpuRegister>(), rhs.As<CpuRegister>()); + __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { - __ cmpl(lhs.As<CpuRegister>(), + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(rhs.GetConstant()->AsIntConstant()->GetValue())); } else { - __ cmpl(lhs.As<CpuRegister>(), + __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } __ j(X86_64Condition(cond->AsCondition()->GetCondition()), @@ -831,17 +832,17 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { if (comp->NeedsMaterialization()) { LocationSummary* locations = comp->GetLocations(); - CpuRegister reg = locations->Out().As<CpuRegister>(); + CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. __ xorq(reg, reg); if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).As<CpuRegister>(), - locations->InAt(1).As<CpuRegister>()); + __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), + locations->InAt(1).AsRegister<CpuRegister>()); } else if (locations->InAt(1).IsConstant()) { - __ cmpl(locations->InAt(0).As<CpuRegister>(), + __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue())); } else { - __ cmpl(locations->InAt(0).As<CpuRegister>(), + __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); } __ setcc(X86_64Condition(comp->GetCondition()), reg); @@ -899,33 +900,61 @@ void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + } + default: + LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); + } } void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { - Label greater, done; LocationSummary* locations = compare->GetLocations(); - switch (compare->InputAt(0)->GetType()) { - case Primitive::kPrimLong: - __ cmpq(locations->InAt(0).As<CpuRegister>(), - locations->InAt(1).As<CpuRegister>()); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location left = locations->InAt(0); + Location right = locations->InAt(1); + + Label less, greater, done; + Primitive::Type type = compare->InputAt(0)->GetType(); + switch (type) { + case Primitive::kPrimLong: { + __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>()); break; + } + case Primitive::kPrimFloat: { + __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + break; + } + case Primitive::kPrimDouble: { + __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); + __ j(kUnordered, compare->IsGtBias() ? &greater : &less); + break; + } default: - LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + LOG(FATAL) << "Unexpected compare type " << type; } - - CpuRegister output = locations->Out().As<CpuRegister>(); - __ movl(output, Immediate(0)); + __ movl(out, Immediate(0)); __ j(kEqual, &done); - __ j(kGreater, &greater); + __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less); // ucomis{s,d} sets CF (kBelow) - __ movl(output, Immediate(-1)); + __ Bind(&greater); + __ movl(out, Immediate(1)); __ jmp(&done); - __ Bind(&greater); - __ movl(output, Immediate(1)); + __ Bind(&less); + __ movl(out, Immediate(-1)); __ Bind(&done); } @@ -1019,12 +1048,12 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { case Primitive::kPrimInt: case Primitive::kPrimNot: case Primitive::kPrimLong: - DCHECK_EQ(ret->GetLocations()->InAt(0).As<CpuRegister>().AsRegister(), RAX); + DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK_EQ(ret->GetLocations()->InAt(0).As<XmmRegister>().AsFloatRegister(), + DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), XMM0); break; @@ -1097,7 +1126,7 @@ void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { } void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { - CpuRegister temp = invoke->GetLocations()->GetTemp(0).As<CpuRegister>(); + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); // TODO: Implement all kinds of calls: // 1) boot -> boot // 2) app -> boot @@ -1113,7 +1142,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) { __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); // (temp + offset_of_quick_compiled_code)() __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64PointerSize).SizeValue())); + kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1156,7 +1185,7 @@ void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - CpuRegister temp = invoke->GetLocations()->GetTemp(0).As<CpuRegister>(); + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); LocationSummary* locations = invoke->GetLocations(); @@ -1167,13 +1196,13 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); __ movl(temp, Address(temp, class_offset)); } else { - __ movl(temp, Address(receiver.As<CpuRegister>(), class_offset)); + __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64PointerSize).SizeValue())); + kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1187,7 +1216,7 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - CpuRegister temp = invoke->GetLocations()->GetTemp(0).As<CpuRegister>(); + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); LocationSummary* locations = invoke->GetLocations(); @@ -1195,7 +1224,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // Set the hidden argument. - __ movq(invoke->GetLocations()->GetTemp(1).As<CpuRegister>(), + __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(), Immediate(invoke->GetDexMethodIndex())); // temp = object->GetClass(); @@ -1203,13 +1232,13 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); __ movl(temp, Address(temp, class_offset)); } else { - __ movl(temp, Address(receiver.As<CpuRegister>(), class_offset)); + __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); } // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64PointerSize).SizeValue())); + kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1228,9 +1257,9 @@ void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); - // Output overlaps as we need a fresh (zero-initialized) - // register to perform subtraction from zero. - locations->SetOut(Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); break; default: @@ -1246,49 +1275,40 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { case Primitive::kPrimInt: DCHECK(in.IsRegister()); DCHECK(in.Equals(out)); - __ negl(out.As<CpuRegister>()); + __ negl(out.AsRegister<CpuRegister>()); break; case Primitive::kPrimLong: DCHECK(in.IsRegister()); DCHECK(in.Equals(out)); - __ negq(out.As<CpuRegister>()); + __ negq(out.AsRegister<CpuRegister>()); break; - case Primitive::kPrimFloat: - DCHECK(in.IsFpuRegister()); - DCHECK(out.IsFpuRegister()); - DCHECK(!in.Equals(out)); - // TODO: Instead of computing negation as a subtraction from - // zero, implement it with an exclusive or with value 0x80000000 - // (mask for bit 31, representing the sign of a single-precision - // floating-point number), fetched from a constant pool: - // - // xorps out, [RIP:...] // value at RIP is 0x80 00 00 00 - - // out = 0 - __ xorps(out.As<XmmRegister>(), out.As<XmmRegister>()); - // out = out - in - __ subss(out.As<XmmRegister>(), in.As<XmmRegister>()); + case Primitive::kPrimFloat: { + DCHECK(in.Equals(out)); + CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>(); + XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + // Implement float negation with an exclusive or with value + // 0x80000000 (mask for bit 31, representing the sign of a + // single-precision floating-point number). + __ movq(constant, Immediate(INT64_C(0x80000000))); + __ movd(mask, constant); + __ xorps(out.AsFpuRegister<XmmRegister>(), mask); break; + } - case Primitive::kPrimDouble: - DCHECK(in.IsFpuRegister()); - DCHECK(out.IsFpuRegister()); - DCHECK(!in.Equals(out)); - // TODO: Instead of computing negation as a subtraction from - // zero, implement it with an exclusive or with value + case Primitive::kPrimDouble: { + DCHECK(in.Equals(out)); + CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>(); + XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + // Implement double negation with an exclusive or with value // 0x8000000000000000 (mask for bit 63, representing the sign of - // a double-precision floating-point number), fetched from a - // constant pool: - // - // xorpd out, [RIP:...] // value at RIP is 0x80 00 00 00 00 00 00 00 - - // out = 0 - __ xorpd(out.As<XmmRegister>(), out.As<XmmRegister>()); - // out = out - in - __ subsd(out.As<XmmRegister>(), in.As<XmmRegister>()); + // a double-precision floating-point number). + __ movq(constant, Immediate(INT64_C(0x8000000000000000))); + __ movd(mask, constant); + __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); break; + } default: LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); @@ -1300,6 +1320,7 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1383,7 +1404,6 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimChar: // Processing a Dex `int-to-char' instruction. locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -1407,6 +1427,11 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-float' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimDouble: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1430,6 +1455,11 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1453,6 +1483,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver Location in = locations->InAt(0); Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); + DCHECK_NE(result_type, input_type); switch (result_type) { case Primitive::kPrimByte: switch (input_type) { @@ -1461,13 +1492,13 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimChar: // Processing a Dex `int-to-byte' instruction. if (in.IsRegister()) { - __ movsxb(out.As<CpuRegister>(), in.As<CpuRegister>()); + __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsStackSlot()) { - __ movsxb(out.As<CpuRegister>(), + __ movsxb(out.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); - __ movl(out.As<CpuRegister>(), + __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int8_t>(in.GetConstant()->AsIntConstant()->GetValue()))); } break; @@ -1485,13 +1516,13 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimChar: // Processing a Dex `int-to-short' instruction. if (in.IsRegister()) { - __ movsxw(out.As<CpuRegister>(), in.As<CpuRegister>()); + __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsStackSlot()) { - __ movsxw(out.As<CpuRegister>(), + __ movsxw(out.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); - __ movl(out.As<CpuRegister>(), + __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int16_t>(in.GetConstant()->AsIntConstant()->GetValue()))); } break; @@ -1507,15 +1538,15 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimLong: // Processing a Dex `long-to-int' instruction. if (in.IsRegister()) { - __ movl(out.As<CpuRegister>(), in.As<CpuRegister>()); + __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsDoubleStackSlot()) { - __ movl(out.As<CpuRegister>(), + __ movl(out.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), in.GetStackIndex())); } else { DCHECK(in.IsConstant()); DCHECK(in.GetConstant()->IsLongConstant()); int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); - __ movl(out.As<CpuRegister>(), Immediate(static_cast<int32_t>(value))); + __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); } break; @@ -1540,7 +1571,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimChar: // Processing a Dex `int-to-long' instruction. DCHECK(in.IsRegister()); - __ movsxd(out.As<CpuRegister>(), in.As<CpuRegister>()); + __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); break; case Primitive::kPrimFloat: @@ -1560,16 +1591,15 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimChar: // Processing a Dex `int-to-char' instruction. if (in.IsRegister()) { - __ movzxw(out.As<CpuRegister>(), in.As<CpuRegister>()); + __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsStackSlot()) { - __ movzxw(out.As<CpuRegister>(), + __ movzxw(out.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsIntConstant()); - __ movl(out.As<CpuRegister>(), + __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue()))); } break; @@ -1582,15 +1612,19 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimFloat: switch (input_type) { - // Processing a Dex `int-to-float' instruction. case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimChar: - __ cvtsi2ss(out.As<XmmRegister>(), in.As<CpuRegister>()); + // Processing a Dex `int-to-float' instruction. + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); break; case Primitive::kPrimLong: + // Processing a Dex `long-to-float' instruction. + __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + break; + case Primitive::kPrimDouble: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1604,15 +1638,19 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver case Primitive::kPrimDouble: switch (input_type) { - // Processing a Dex `int-to-double' instruction. case Primitive::kPrimByte: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimChar: - __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>()); + // Processing a Dex `int-to-double' instruction. + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); break; case Primitive::kPrimLong: + // Processing a Dex `long-to-double' instruction. + __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); + break; + case Primitive::kPrimFloat: LOG(FATAL) << "Type conversion from " << input_type << " to " << result_type << " not yet implemented"; @@ -1670,28 +1708,28 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { switch (add->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ addl(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (second.IsConstant()) { Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ addl(first.As<CpuRegister>(), imm); + __ addl(first.AsRegister<CpuRegister>(), imm); } else { - __ addl(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); + __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); } break; } case Primitive::kPrimLong: { - __ addq(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ addq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - __ addss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -1736,27 +1774,27 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { switch (sub->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ subl(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (second.IsConstant()) { Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ subl(first.As<CpuRegister>(), imm); + __ subl(first.AsRegister<CpuRegister>(), imm); } else { - __ subl(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); + __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); } break; } case Primitive::kPrimLong: { - __ subq(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - __ subss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - __ subsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -1802,28 +1840,29 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { switch (mul->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ imull(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (second.IsConstant()) { Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ imull(first.As<CpuRegister>(), imm); + __ imull(first.AsRegister<CpuRegister>(), imm); } else { DCHECK(second.IsStackSlot()); - __ imull(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); + __ imull(first.AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); } break; } case Primitive::kPrimLong: { - __ imulq(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); break; } case Primitive::kPrimFloat: { - __ mulss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - __ mulsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -1840,10 +1879,10 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); - CpuRegister out_reg = locations->Out().As<CpuRegister>(); - CpuRegister second_reg = locations->InAt(1).As<CpuRegister>(); + CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>(); + CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>(); - DCHECK_EQ(RAX, locations->InAt(0).As<CpuRegister>().AsRegister()); + DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister()); SlowPathCodeX86_64* slow_path = @@ -1915,12 +1954,12 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ divss(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>()); + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); break; } @@ -1993,7 +2032,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio switch (instruction->GetType()) { case Primitive::kPrimInt: { if (value.IsRegister()) { - __ testl(value.As<CpuRegister>(), value.As<CpuRegister>()); + __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); __ j(kEqual, slow_path->GetEntryLabel()); } else if (value.IsStackSlot()) { __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); @@ -2008,7 +2047,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio } case Primitive::kPrimLong: { if (value.IsRegister()) { - __ testq(value.As<CpuRegister>(), value.As<CpuRegister>()); + __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); __ j(kEqual, slow_path->GetEntryLabel()); } else if (value.IsDoubleStackSlot()) { __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); @@ -2026,6 +2065,107 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio } } +void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + // The shift count needs to be in CL. + locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1))); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { + DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); + + LocationSummary* locations = op->GetLocations(); + CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); + Location second = locations->InAt(1); + + switch (op->GetResultType()) { + case Primitive::kPrimInt: { + if (second.IsRegister()) { + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + if (op->IsShl()) { + __ shll(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarl(first_reg, second_reg); + } else { + __ shrl(first_reg, second_reg); + } + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); + if (op->IsShl()) { + __ shll(first_reg, imm); + } else if (op->IsShr()) { + __ sarl(first_reg, imm); + } else { + __ shrl(first_reg, imm); + } + } + break; + } + case Primitive::kPrimLong: { + if (second.IsRegister()) { + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + if (op->IsShl()) { + __ shlq(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarq(first_reg, second_reg); + } else { + __ shrq(first_reg, second_reg); + } + } else { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); + if (op->IsShl()) { + __ shlq(first_reg, imm); + } else if (op->IsShr()) { + __ sarq(first_reg, imm); + } else { + __ shrq(first_reg, imm); + } + } + break; + } + default: + LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); + } +} + +void LocationsBuilderX86_64::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) { + HandleShift(shl); +} + +void LocationsBuilderX86_64::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) { + HandleShift(shr); +} + +void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + +void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { + HandleShift(ushr); +} + void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); @@ -2095,20 +2235,20 @@ void LocationsBuilderX86_64::VisitNot(HNot* not_) { void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { LocationSummary* locations = not_->GetLocations(); - DCHECK_EQ(locations->InAt(0).As<CpuRegister>().AsRegister(), - locations->Out().As<CpuRegister>().AsRegister()); + DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), + locations->Out().AsRegister<CpuRegister>().AsRegister()); Location out = locations->Out(); switch (not_->InputAt(0)->GetType()) { case Primitive::kPrimBoolean: - __ xorq(out.As<CpuRegister>(), Immediate(1)); + __ xorq(out.AsRegister<CpuRegister>(), Immediate(1)); break; case Primitive::kPrimInt: - __ notl(out.As<CpuRegister>()); + __ notl(out.AsRegister<CpuRegister>()); break; case Primitive::kPrimLong: - __ notq(out.As<CpuRegister>()); + __ notq(out.AsRegister<CpuRegister>()); break; default: @@ -2147,51 +2287,51 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); size_t offset = instruction->GetFieldOffset().SizeValue(); Primitive::Type field_type = instruction->GetFieldType(); switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movb(Address(obj, offset), value); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movw(Address(obj, offset), value); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movl(Address(obj, offset), value); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).As<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).As<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); codegen_->MarkGCCard(temp, card, obj, value); } break; } case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movq(Address(obj, offset), value); break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movss(Address(obj, offset), value); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movsd(Address(obj, offset), value); break; } @@ -2211,55 +2351,55 @@ void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instructio void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); size_t offset = instruction->GetFieldOffset().SizeValue(); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movzxb(out, Address(obj, offset)); break; } case Primitive::kPrimByte: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movsxb(out, Address(obj, offset)); break; } case Primitive::kPrimShort: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movsxw(out, Address(obj, offset)); break; } case Primitive::kPrimChar: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movzxw(out, Address(obj, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(obj, offset)); break; } case Primitive::kPrimLong: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movq(out, Address(obj, offset)); break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movss(out, Address(obj, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movsd(out, Address(obj, offset)); break; } @@ -2287,7 +2427,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.As<CpuRegister>(), Immediate(0)); + __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0)); } else if (obj.IsStackSlot()) { __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); } else { @@ -2310,54 +2450,54 @@ void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); Location index = locations->InAt(1); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); } else { - __ movzxb(out, Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset)); + __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset)); } break; } case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxb(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset)); } else { - __ movsxb(out, Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset)); + __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset)); } break; } case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movsxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); } else { - __ movsxw(out, Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset)); + __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset)); } break; } case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movzxw(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset)); } else { - __ movzxw(out, Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset)); + __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset)); } break; } @@ -2366,48 +2506,48 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimNot: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movl(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); } else { - __ movl(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset)); + __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); } break; } case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); if (index.IsConstant()) { __ movq(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); } else { - __ movq(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset)); + __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset)); } break; } case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movss(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset)); } else { - __ movss(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset)); + __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset)); } break; } case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); if (index.IsConstant()) { __ movsd(out, Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset)); } else { - __ movsd(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset)); + __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset)); } break; } @@ -2455,7 +2595,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); @@ -2470,16 +2610,17 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; if (value.IsRegister()) { - __ movb(Address(obj, offset), value.As<CpuRegister>()); + __ movb(Address(obj, offset), value.AsRegister<CpuRegister>()); } else { - __ movb(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + __ movb(Address(obj, offset), + Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } else { if (value.IsRegister()) { - __ movb(Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset), - value.As<CpuRegister>()); + __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset), + value.AsRegister<CpuRegister>()); } else { - __ movb(Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset), + __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } @@ -2492,19 +2633,20 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; if (value.IsRegister()) { - __ movw(Address(obj, offset), value.As<CpuRegister>()); + __ movw(Address(obj, offset), value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; - __ movw(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + __ movw(Address(obj, offset), + Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } else { DCHECK(index.IsRegister()) << index; if (value.IsRegister()) { - __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset), - value.As<CpuRegister>()); + __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset), + value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; - __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset), + __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } @@ -2519,7 +2661,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; if (value.IsRegister()) { - __ movl(Address(obj, offset), value.As<CpuRegister>()); + __ movl(Address(obj, offset), value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; __ movl(Address(obj, offset), @@ -2528,24 +2670,25 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } else { DCHECK(index.IsRegister()) << index; if (value.IsRegister()) { - __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset), - value.As<CpuRegister>()); + __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), + value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; - __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset), + __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } } if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); - CpuRegister temp = locations->GetTemp(0).As<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).As<CpuRegister>(); - codegen_->MarkGCCard(temp, card, obj, value.As<CpuRegister>()); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, card, obj, value.AsRegister<CpuRegister>()); } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); - __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), true)); + __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), + true)); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } @@ -2557,11 +2700,11 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; DCHECK(value.IsRegister()); - __ movq(Address(obj, offset), value.As<CpuRegister>()); + __ movq(Address(obj, offset), value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsRegister()); - __ movq(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset), - value.As<CpuRegister>()); + __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), + value.AsRegister<CpuRegister>()); } break; } @@ -2571,11 +2714,11 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; DCHECK(value.IsFpuRegister()); - __ movss(Address(obj, offset), value.As<XmmRegister>()); + __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>()); } else { DCHECK(value.IsFpuRegister()); - __ movss(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset), - value.As<XmmRegister>()); + __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), + value.AsFpuRegister<XmmRegister>()); } break; } @@ -2585,11 +2728,11 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; DCHECK(value.IsFpuRegister()); - __ movsd(Address(obj, offset), value.As<XmmRegister>()); + __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>()); } else { DCHECK(value.IsFpuRegister()); - __ movsd(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset), - value.As<XmmRegister>()); + __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), + value.AsFpuRegister<XmmRegister>()); } break; } @@ -2610,8 +2753,8 @@ void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = instruction->GetLocations(); uint32_t offset = mirror::Array::LengthOffset().Uint32Value(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(obj, offset)); } @@ -2631,8 +2774,8 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) instruction, locations->InAt(0), locations->InAt(1)); codegen_->AddSlowPath(slow_path); - CpuRegister index = locations->InAt(0).As<CpuRegister>(); - CpuRegister length = locations->InAt(1).As<CpuRegister>(); + CpuRegister index = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister length = locations->InAt(1).AsRegister<CpuRegister>(); __ cmpl(index, length); __ j(kAboveEqual, slow_path->GetEntryLabel()); @@ -2716,21 +2859,21 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (source.IsRegister()) { if (destination.IsRegister()) { - __ movq(destination.As<CpuRegister>(), source.As<CpuRegister>()); + __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (destination.IsStackSlot()) { __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<CpuRegister>()); + source.AsRegister<CpuRegister>()); } else { DCHECK(destination.IsDoubleStackSlot()); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<CpuRegister>()); + source.AsRegister<CpuRegister>()); } } else if (source.IsStackSlot()) { if (destination.IsRegister()) { - __ movl(destination.As<CpuRegister>(), + __ movl(destination.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } else if (destination.IsFpuRegister()) { - __ movss(destination.As<XmmRegister>(), + __ movss(destination.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); @@ -2739,10 +2882,11 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else if (source.IsDoubleStackSlot()) { if (destination.IsRegister()) { - __ movq(destination.As<CpuRegister>(), + __ movq(destination.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); } else if (destination.IsFpuRegister()) { - __ movsd(destination.As<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movsd(destination.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); @@ -2753,7 +2897,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (constant->IsIntConstant()) { Immediate imm(constant->AsIntConstant()->GetValue()); if (destination.IsRegister()) { - __ movl(destination.As<CpuRegister>(), imm); + __ movl(destination.AsRegister<CpuRegister>(), imm); } else { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); @@ -2761,7 +2905,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); if (destination.IsRegister()) { - __ movq(destination.As<CpuRegister>(), Immediate(value)); + __ movq(destination.AsRegister<CpuRegister>(), Immediate(value)); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), Immediate(value)); @@ -2771,7 +2915,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue())); if (destination.IsFpuRegister()) { __ movl(CpuRegister(TMP), imm); - __ movd(destination.As<XmmRegister>(), CpuRegister(TMP)); + __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); } else { DCHECK(destination.IsStackSlot()) << destination; __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); @@ -2781,7 +2925,7 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue())); if (destination.IsFpuRegister()) { __ movq(CpuRegister(TMP), imm); - __ movd(destination.As<XmmRegister>(), CpuRegister(TMP)); + __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; __ movq(CpuRegister(TMP), imm); @@ -2790,14 +2934,14 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else if (source.IsFpuRegister()) { if (destination.IsFpuRegister()) { - __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>()); + __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); } else if (destination.IsStackSlot()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<XmmRegister>()); + source.AsFpuRegister<XmmRegister>()); } else { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), - source.As<XmmRegister>()); + source.AsFpuRegister<XmmRegister>()); } } } @@ -2858,31 +3002,31 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgq(destination.As<CpuRegister>(), source.As<CpuRegister>()); + __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsRegister() && destination.IsStackSlot()) { - Exchange32(source.As<CpuRegister>(), destination.GetStackIndex()); + Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { - Exchange32(destination.As<CpuRegister>(), source.GetStackIndex()); + Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange32(destination.GetStackIndex(), source.GetStackIndex()); } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { - Exchange64(source.As<CpuRegister>(), destination.GetStackIndex()); + Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { - Exchange64(destination.As<CpuRegister>(), source.GetStackIndex()); + Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { Exchange64(destination.GetStackIndex(), source.GetStackIndex()); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { - __ movd(CpuRegister(TMP), source.As<XmmRegister>()); - __ movaps(source.As<XmmRegister>(), destination.As<XmmRegister>()); - __ movd(destination.As<XmmRegister>(), CpuRegister(TMP)); + __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); + __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); + __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); } else if (source.IsFpuRegister() && destination.IsStackSlot()) { - Exchange32(source.As<XmmRegister>(), destination.GetStackIndex()); + Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsFpuRegister()) { - Exchange32(destination.As<XmmRegister>(), source.GetStackIndex()); + Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { - Exchange64(source.As<XmmRegister>(), destination.GetStackIndex()); + Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { - Exchange64(destination.As<XmmRegister>(), source.GetStackIndex()); + Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; } @@ -2917,7 +3061,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { } void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { - CpuRegister out = cls->GetLocations()->Out().As<CpuRegister>(); + CpuRegister out = cls->GetLocations()->Out().AsRegister<CpuRegister>(); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); @@ -2955,7 +3099,8 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); - GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).As<CpuRegister>()); + GenerateClassInitializationCheck(slow_path, + check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); } void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { @@ -2967,55 +3112,55 @@ void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).As<CpuRegister>(); + CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); size_t offset = instruction->GetFieldOffset().SizeValue(); switch (instruction->GetType()) { case Primitive::kPrimBoolean: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movzxb(out, Address(cls, offset)); break; } case Primitive::kPrimByte: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movsxb(out, Address(cls, offset)); break; } case Primitive::kPrimShort: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movsxw(out, Address(cls, offset)); break; } case Primitive::kPrimChar: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movzxw(out, Address(cls, offset)); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movl(out, Address(cls, offset)); break; } case Primitive::kPrimLong: { - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); __ movq(out, Address(cls, offset)); break; } case Primitive::kPrimFloat: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movss(out, Address(cls, offset)); break; } case Primitive::kPrimDouble: { - XmmRegister out = locations->Out().As<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); __ movsd(out, Address(cls, offset)); break; } @@ -3043,51 +3188,51 @@ void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister cls = locations->InAt(0).As<CpuRegister>(); + CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>(); size_t offset = instruction->GetFieldOffset().SizeValue(); Primitive::Type field_type = instruction->GetFieldType(); switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movb(Address(cls, offset), value); break; } case Primitive::kPrimShort: case Primitive::kPrimChar: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movw(Address(cls, offset), value); break; } case Primitive::kPrimInt: case Primitive::kPrimNot: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movl(Address(cls, offset), value); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) { - CpuRegister temp = locations->GetTemp(0).As<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).As<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); codegen_->MarkGCCard(temp, card, cls, value); } break; } case Primitive::kPrimLong: { - CpuRegister value = locations->InAt(1).As<CpuRegister>(); + CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>(); __ movq(Address(cls, offset), value); break; } case Primitive::kPrimFloat: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movss(Address(cls, offset), value); break; } case Primitive::kPrimDouble: { - XmmRegister value = locations->InAt(1).As<XmmRegister>(); + XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>(); __ movsd(Address(cls, offset), value); break; } @@ -3108,9 +3253,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); - CpuRegister out = load->GetLocations()->Out().As<CpuRegister>(); + CpuRegister out = load->GetLocations()->Out().AsRegister<CpuRegister>(); codegen_->LoadCurrentMethod(CpuRegister(out)); - __ movl(out, Address(out, mirror::ArtMethod::DexCacheStringsOffset().Int32Value())); + __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value())); + __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); @@ -3126,7 +3272,7 @@ void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { Address address = Address::Absolute( Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true); - __ gs()->movl(load->GetLocations()->Out().As<CpuRegister>(), address); + __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), address); __ gs()->movl(address, Immediate(0)); } @@ -3155,9 +3301,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister out = locations->Out().As<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Label done, zero; SlowPathCodeX86_64* slow_path = nullptr; @@ -3169,7 +3315,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // Compare the class of `obj` with `cls`. __ movl(out, Address(obj, class_offset)); if (cls.IsRegister()) { - __ cmpl(out, cls.As<CpuRegister>()); + __ cmpl(out, cls.AsRegister<CpuRegister>()); } else { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); @@ -3207,9 +3353,9 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).As<CpuRegister>(); + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); Location cls = locations->InAt(1); - CpuRegister temp = locations->GetTemp(0).As<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64( instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); @@ -3221,7 +3367,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // Compare the class of `obj` with `cls`. __ movl(temp, Address(obj, class_offset)); if (cls.IsRegister()) { - __ cmpl(temp, cls.As<CpuRegister>()); + __ cmpl(temp, cls.AsRegister<CpuRegister>()); } else { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); @@ -3286,43 +3432,43 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in if (instruction->GetResultType() == Primitive::kPrimInt) { if (second.IsRegister()) { if (instruction->IsAnd()) { - __ andl(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (instruction->IsOr()) { - __ orl(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else { DCHECK(instruction->IsXor()); - __ xorl(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } } else if (second.IsConstant()) { Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); if (instruction->IsAnd()) { - __ andl(first.As<CpuRegister>(), imm); + __ andl(first.AsRegister<CpuRegister>(), imm); } else if (instruction->IsOr()) { - __ orl(first.As<CpuRegister>(), imm); + __ orl(first.AsRegister<CpuRegister>(), imm); } else { DCHECK(instruction->IsXor()); - __ xorl(first.As<CpuRegister>(), imm); + __ xorl(first.AsRegister<CpuRegister>(), imm); } } else { Address address(CpuRegister(RSP), second.GetStackIndex()); if (instruction->IsAnd()) { - __ andl(first.As<CpuRegister>(), address); + __ andl(first.AsRegister<CpuRegister>(), address); } else if (instruction->IsOr()) { - __ orl(first.As<CpuRegister>(), address); + __ orl(first.AsRegister<CpuRegister>(), address); } else { DCHECK(instruction->IsXor()); - __ xorl(first.As<CpuRegister>(), address); + __ xorl(first.AsRegister<CpuRegister>(), address); } } } else { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); if (instruction->IsAnd()) { - __ andq(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (instruction->IsOr()) { - __ orq(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else { DCHECK(instruction->IsXor()); - __ xorq(first.As<CpuRegister>(), second.As<CpuRegister>()); + __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 86f3b4ebf7..794b81ffbc 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -25,7 +25,8 @@ namespace art { namespace x86_64 { -static constexpr size_t kX86_64WordSize = 8; +// Use a local definition to prevent copying mistakes. +static constexpr size_t kX86_64WordSize = kX86_64PointerSize; static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; static constexpr FloatRegister kParameterFloatRegisters[] = @@ -107,6 +108,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleShift(HBinaryOperation* operation); CodeGeneratorX86_64* const codegen_; InvokeDexCallingConventionVisitor parameter_visitor_; @@ -135,6 +137,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void HandleShift(HBinaryOperation* operation); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index d2acfa6973..ac00824e33 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -32,10 +32,10 @@ namespace art { */ class HConstantFolding : public HOptimization { public: - HConstantFolding(HGraph* graph, const HGraphVisualizer& visualizer) - : HOptimization(graph, true, kConstantFoldingPassName, visualizer) {} + explicit HConstantFolding(HGraph* graph) + : HOptimization(graph, true, kConstantFoldingPassName) {} - virtual void Run() OVERRIDE; + void Run() OVERRIDE; static constexpr const char* kConstantFoldingPassName = "constant_folding"; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 856c5165a3..a56b9d9a12 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -47,8 +47,7 @@ static void TestCode(const uint16_t* data, ASSERT_EQ(expected_before, actual_before); x86::CodeGeneratorX86 codegen(graph); - HGraphVisualizer visualizer(nullptr, graph, codegen, ""); - HConstantFolding(graph, visualizer).Run(); + HConstantFolding(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); ASSERT_TRUE(ssa_checker.IsValid()); @@ -60,7 +59,7 @@ static void TestCode(const uint16_t* data, check_after_cf(graph); - HDeadCodeElimination(graph, visualizer).Run(); + HDeadCodeElimination(graph).Run(); ssa_checker.Run(); ASSERT_TRUE(ssa_checker.IsValid()); diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index a4446ae04d..3db2c3ff3f 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -28,10 +28,10 @@ namespace art { */ class HDeadCodeElimination : public HOptimization { public: - HDeadCodeElimination(HGraph* graph, const HGraphVisualizer& visualizer) - : HOptimization(graph, true, kDeadCodeEliminationPassName, visualizer) {} + explicit HDeadCodeElimination(HGraph* graph) + : HOptimization(graph, true, kDeadCodeEliminationPassName) {} - virtual void Run() OVERRIDE; + void Run() OVERRIDE; static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination"; diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 0c6807482a..5d4b9cb024 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -41,8 +41,7 @@ static void TestCode(const uint16_t* data, ASSERT_EQ(actual_before, expected_before); x86::CodeGeneratorX86 codegen(graph); - HGraphVisualizer visualizer(nullptr, graph, codegen, ""); - HDeadCodeElimination(graph, visualizer).Run(); + HDeadCodeElimination(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); ASSERT_TRUE(ssa_checker.IsValid()); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 1953241a2a..5d712feb2b 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -342,4 +342,72 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } +static Primitive::Type PrimitiveKind(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + return Primitive::kPrimInt; + default: + return type; + } +} + +void SSAChecker::VisitCondition(HCondition* op) { + VisitInstruction(op); + // TODO: check inputs types, and special case the `null` check. + if (op->GetType() != Primitive::kPrimBoolean) { + std::stringstream error; + error << "Condition " << op->DebugName() << " " << op->GetId() + << " has a non-boolean result type: " + << op->GetType() << "."; + errors_.push_back(error.str()); + } +} + +void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { + VisitInstruction(op); + if (op->IsUShr() || op->IsShr() || op->IsShl()) { + if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) { + std::stringstream error; + error << "Shift operation " << op->DebugName() << " " << op->GetId() + << " has a non-int kind second input: " + << op->InputAt(1)->DebugName() << " of type " << op->InputAt(1)->GetType() + << "."; + errors_.push_back(error.str()); + } + } else { + if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) { + std::stringstream error; + error << "Binary operation " << op->DebugName() << " " << op->GetId() + << " has inputs of different type: " + << op->InputAt(0)->GetType() << ", and " << op->InputAt(1)->GetType() + << "."; + errors_.push_back(error.str()); + } + } + + if (op->IsCompare()) { + if (op->GetType() != Primitive::kPrimInt) { + std::stringstream error; + error << "Compare operation " << op->GetId() + << " has a non-int result type: " + << op->GetType() << "."; + errors_.push_back(error.str()); + } + } else { + // Use the first input, so that we can also make this check for shift operations. + if (PrimitiveKind(op->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) { + std::stringstream error; + error << "Binary operation " << op->DebugName() << " " << op->GetId() + << " has a result type different than its input type: " + << op->GetType() << ", and " << op->InputAt(1)->GetType() + << "."; + errors_.push_back(error.str()); + } + } +} + } // namespace art diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 8ba8cb16b1..b6c9f1720c 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -24,11 +24,11 @@ namespace art { // A control-flow graph visitor performing various checks. -class GraphChecker : public HGraphVisitor { +class GraphChecker : public HGraphDelegateVisitor { public: GraphChecker(ArenaAllocator* allocator, HGraph* graph, const char* dump_prefix = "art::GraphChecker: ") - : HGraphVisitor(graph), + : HGraphDelegateVisitor(graph), allocator_(allocator), dump_prefix_(dump_prefix) {} @@ -36,10 +36,10 @@ class GraphChecker : public HGraphVisitor { virtual void Run() { VisitInsertionOrder(); } // Check `block`. - virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + void VisitBasicBlock(HBasicBlock* block) OVERRIDE; // Check `instruction`. - virtual void VisitInstruction(HInstruction* instruction) OVERRIDE; + void VisitInstruction(HInstruction* instruction) OVERRIDE; // Was the last visit of the graph valid? bool IsValid() const { @@ -82,7 +82,7 @@ class SSAChecker : public GraphChecker { : GraphChecker(allocator, graph, "art::SSAChecker: ") {} // Check the whole graph (in reverse post-order). - virtual void Run() { + void Run() OVERRIDE { // VisitReversePostOrder is used instead of VisitInsertionOrder, // as the latter might visit dead blocks removed by the dominator // computation. @@ -90,13 +90,15 @@ class SSAChecker : public GraphChecker { } // Perform SSA form checks on `block`. - virtual void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + void VisitBasicBlock(HBasicBlock* block) OVERRIDE; // Loop-related checks from block `loop_header`. void CheckLoop(HBasicBlock* loop_header); // Perform SSA form checks on instructions. - virtual void VisitInstruction(HInstruction* instruction) OVERRIDE; - virtual void VisitPhi(HPhi* phi) OVERRIDE; + void VisitInstruction(HInstruction* instruction) OVERRIDE; + void VisitPhi(HPhi* phi) OVERRIDE; + void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; + void VisitCondition(HCondition* op) OVERRIDE; private: DISALLOW_COPY_AND_ASSIGN(SSAChecker); diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index 4d8bec2422..60d996ba88 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -30,7 +30,6 @@ class HGraph; // TODO: Create an analysis/optimization abstraction. static const char* kLivenessPassName = "liveness"; static const char* kRegisterAllocatorPassName = "register"; -static const char* kGVNPassName = "gvn"; /** * If enabled, emits compilation information suitable for the c1visualizer tool diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 25168b5b0c..6e5f1bd203 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -91,29 +91,38 @@ SideEffects GlobalValueNumberer::GetBlockEffects(HBasicBlock* block) const { return block_effects_.Get(block->GetBlockId()); } -static bool IsLoopExit(HBasicBlock* block, HBasicBlock* successor) { - HLoopInformation* block_info = block->GetLoopInformation(); - HLoopInformation* other_info = successor->GetLoopInformation(); - return block_info != other_info && (other_info == nullptr || block_info->IsIn(*other_info)); -} - void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { - if (kIsDebugBuild) { - // Check that all non back-edge processors have been visited. - for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { - HBasicBlock* predecessor = block->GetPredecessors().Get(i); - DCHECK(visited_.Get(predecessor->GetBlockId()) - || (block->GetLoopInformation() != nullptr - && (block->GetLoopInformation()->GetBackEdges().Get(0) == predecessor))); + ValueSet* set = nullptr; + const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors(); + if (predecessors.Size() == 0 || predecessors.Get(0)->IsEntryBlock()) { + // The entry block should only accumulate constant instructions, and + // the builder puts constants only in the entry block. + // Therefore, there is no need to propagate the value set to the next block. + set = new (allocator_) ValueSet(allocator_); + } else { + HBasicBlock* dominator = block->GetDominator(); + set = sets_.Get(dominator->GetBlockId())->Copy(); + if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) { + // We have to copy if the dominator has other successors, or `block` is not a successor + // of the dominator. + set = set->Copy(); + } + if (!set->IsEmpty()) { + if (block->IsLoopHeader()) { + DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); + set->Kill(GetLoopEffects(block)); + } else if (predecessors.Size() > 1) { + for (size_t i = 0, e = predecessors.Size(); i < e; ++i) { + set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId())); + if (set->IsEmpty()) { + break; + } + } + } } - visited_.Put(block->GetBlockId(), true); } - ValueSet* set = sets_.Get(block->GetBlockId()); - - if (block->IsLoopHeader()) { - set->Kill(GetLoopEffects(block)); - } + sets_.Put(block->GetBlockId(), set); HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { @@ -131,57 +140,6 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { } current = next; } - - if (block == graph_->GetEntryBlock()) { - // The entry block should only accumulate constant instructions, and - // the builder puts constants only in the entry block. - // Therefore, there is no need to propagate the value set to the next block. - DCHECK_EQ(block->GetDominatedBlocks().Size(), 1u); - HBasicBlock* dominated = block->GetDominatedBlocks().Get(0); - sets_.Put(dominated->GetBlockId(), new (allocator_) ValueSet(allocator_)); - return; - } - - // Copy the value set to dominated blocks. We can re-use - // the current set for the last dominated block because we are done visiting - // this block. - for (size_t i = 0, e = block->GetDominatedBlocks().Size(); i < e; ++i) { - HBasicBlock* dominated = block->GetDominatedBlocks().Get(i); - sets_.Put(dominated->GetBlockId(), i == e - 1 ? set : set->Copy()); - } - - // Kill instructions in the value set of each successor. If the successor - // is a loop exit, then we use the side effects of the loop. If not, we use - // the side effects of this block. - for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { - HBasicBlock* successor = block->GetSuccessors().Get(i); - if (successor->IsLoopHeader() - && successor->GetLoopInformation()->GetBackEdges().Get(0) == block) { - // In case of a back edge, we already have visited the loop header. - // We should not update its value set, because the last dominated block - // of the loop header uses the same value set. - DCHECK(visited_.Get(successor->GetBlockId())); - continue; - } - DCHECK(!visited_.Get(successor->GetBlockId())); - ValueSet* successor_set = sets_.Get(successor->GetBlockId()); - // The dominator sets the set, and we are guaranteed to have visited it already. - DCHECK(successor_set != nullptr); - - // If this block dominates this successor there is nothing to do. - // Also if the set is empty, there is nothing to kill. - if (successor->GetDominator() != block && !successor_set->IsEmpty()) { - if (block->IsInLoop() && IsLoopExit(block, successor)) { - // All instructions killed in the loop must be killed for a loop exit. - SideEffects effects = GetLoopEffects(block->GetLoopInformation()->GetHeader()); - sets_.Get(successor->GetBlockId())->Kill(effects); - } else { - // Following block (that might be in the same loop). - // Just kill instructions based on this block's side effects. - sets_.Get(successor->GetBlockId())->Kill(GetBlockEffects(block)); - } - } - } } } // namespace art diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h index 8d2c77475c..81f2c3fa87 100644 --- a/compiler/optimizing/gvn.h +++ b/compiler/optimizing/gvn.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_GVN_H_ #include "nodes.h" +#include "optimization.h" namespace art { @@ -95,6 +96,26 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { return nullptr; } + // Returns whether `instruction` is in the set. + HInstruction* IdentityLookup(HInstruction* instruction) const { + size_t hash_code = instruction->ComputeHashCode(); + size_t index = hash_code % kDefaultNumberOfEntries; + HInstruction* existing = table_[index]; + if (existing != nullptr && existing == instruction) { + return existing; + } + + for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) { + if (node->GetHashCode() == hash_code) { + existing = node->GetInstruction(); + if (existing == instruction) { + return existing; + } + } + } + return nullptr; + } + // Removes all instructions in the set that are affected by the given side effects. void Kill(SideEffects side_effects) { for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { @@ -105,9 +126,9 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { } } - ValueSetNode* current = collisions_; - ValueSetNode* previous = nullptr; - while (current != nullptr) { + for (ValueSetNode* current = collisions_, *previous = nullptr; + current != nullptr; + current = current->GetNext()) { HInstruction* instruction = current->GetInstruction(); if (instruction->GetSideEffects().DependsOn(side_effects)) { if (previous == nullptr) { @@ -119,7 +140,6 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { } else { previous = current; } - current = current->GetNext(); } } @@ -142,6 +162,44 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { return copy; } + void Clear() { + number_of_entries_ = 0; + collisions_ = nullptr; + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + table_[i] = nullptr; + } + } + + // Update this `ValueSet` by intersecting with instructions in `other`. + void IntersectionWith(ValueSet* other) { + if (IsEmpty()) { + return; + } else if (other->IsEmpty()) { + Clear(); + } else { + for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) { + if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) { + --number_of_entries_; + table_[i] = nullptr; + } + } + for (ValueSetNode* current = collisions_, *previous = nullptr; + current != nullptr; + current = current->GetNext()) { + if (other->IdentityLookup(current->GetInstruction()) == nullptr) { + if (previous == nullptr) { + collisions_ = current->GetNext(); + } else { + previous->SetNext(current->GetNext()); + } + --number_of_entries_; + } else { + previous = current; + } + } + } + } + bool IsEmpty() const { return number_of_entries_ == 0; } size_t GetNumberOfEntries() const { return number_of_entries_; } @@ -168,17 +226,15 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { class GlobalValueNumberer : public ValueObject { public: GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph) - : allocator_(allocator), - graph_(graph), + : graph_(graph), + allocator_(allocator), block_effects_(allocator, graph->GetBlocks().Size()), loop_effects_(allocator, graph->GetBlocks().Size()), - sets_(allocator, graph->GetBlocks().Size()), - visited_(allocator, graph->GetBlocks().Size()) { + sets_(allocator, graph->GetBlocks().Size()) { size_t number_of_blocks = graph->GetBlocks().Size(); block_effects_.SetSize(number_of_blocks); loop_effects_.SetSize(number_of_blocks); sets_.SetSize(number_of_blocks); - visited_.SetSize(number_of_blocks); for (size_t i = 0; i < number_of_blocks; ++i) { block_effects_.Put(i, SideEffects::None()); @@ -201,8 +257,9 @@ class GlobalValueNumberer : public ValueObject { SideEffects GetLoopEffects(HBasicBlock* block) const; SideEffects GetBlockEffects(HBasicBlock* block) const; + HGraph* graph_; + ArenaAllocator* const allocator_; - HGraph* const graph_; // Side effects of individual blocks, that is the union of the side effects // of the instructions in the block. @@ -217,13 +274,23 @@ class GlobalValueNumberer : public ValueObject { // in the path from the dominator to the block. GrowableArray<ValueSet*> sets_; - // Mark visisted blocks. Only used for debugging. - GrowableArray<bool> visited_; - ART_FRIEND_TEST(GVNTest, LoopSideEffects); DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); }; +class GVNOptimization : public HOptimization { + public: + explicit GVNOptimization(HGraph* graph) : HOptimization(graph, true, "GVN") {} + + void Run() OVERRIDE { + GlobalValueNumberer gvn(graph_->GetArena(), graph_); + gvn.Run(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(GVNOptimization); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_GVN_H_ diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 3e8361eca1..49ca44331d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,11 +18,23 @@ namespace art { +class InstructionSimplifierVisitor : public HGraphVisitor { + public: + explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {} + + private: + void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; + void VisitEqual(HEqual* equal) OVERRIDE; + void VisitArraySet(HArraySet* equal) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; +}; + void InstructionSimplifier::Run() { - VisitInsertionOrder(); + InstructionSimplifierVisitor visitor(graph_); + visitor.VisitInsertionOrder(); } -void InstructionSimplifier::VisitSuspendCheck(HSuspendCheck* check) { +void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) { HBasicBlock* block = check->GetBlock(); // Currently always keep the suspend check at entry. if (block->IsEntryBlock()) return; @@ -38,7 +50,7 @@ void InstructionSimplifier::VisitSuspendCheck(HSuspendCheck* check) { block->RemoveInstruction(check); } -void InstructionSimplifier::VisitEqual(HEqual* equal) { +void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { HInstruction* input1 = equal->InputAt(0); HInstruction* input2 = equal->InputAt(1); if (input1->GetType() == Primitive::kPrimBoolean && input2->IsIntConstant()) { @@ -55,7 +67,7 @@ void InstructionSimplifier::VisitEqual(HEqual* equal) { } } -void InstructionSimplifier::VisitArraySet(HArraySet* instruction) { +void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { HInstruction* value = instruction->GetValue(); if (value->GetType() != Primitive::kPrimNot) return; @@ -67,4 +79,12 @@ void InstructionSimplifier::VisitArraySet(HArraySet* instruction) { } } +void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) { + if (instruction->GetResultType() == instruction->GetInputType()) { + // Remove the instruction if it's converting to the same type. + instruction->ReplaceWith(instruction->GetInput()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 3844d57439..7068c7fc10 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -18,22 +18,19 @@ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ #include "nodes.h" +#include "optimization.h" namespace art { /** * Implements optimizations specific to each instruction. */ -class InstructionSimplifier : public HGraphVisitor { +class InstructionSimplifier : public HOptimization { public: - explicit InstructionSimplifier(HGraph* graph) : HGraphVisitor(graph) {} + explicit InstructionSimplifier(HGraph* graph) + : HOptimization(graph, true, "instruction_simplifier") {} - void Run(); - - private: - virtual void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; - virtual void VisitEqual(HEqual* equal) OVERRIDE; - virtual void VisitArraySet(HArraySet* equal) OVERRIDE; + void Run() OVERRIDE; }; } // namespace art diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 6dd4207795..c49cf7e03f 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -50,10 +50,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); - ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks); + ASSERT_EQ(liveness.GetLinearOrder().Size(), number_of_blocks); for (size_t i = 0; i < number_of_blocks; ++i) { - ASSERT_EQ(liveness.GetLinearPostOrder().Get(number_of_blocks - i - 1)->GetBlockId(), - expected_order[i]); + ASSERT_EQ(liveness.GetLinearOrder().Get(i)->GetBlockId(), expected_order[i]); } } @@ -194,4 +193,58 @@ TEST(LinearizeTest, CFG5) { TestCode(data, blocks, 12); } +TEST(LinearizeTest, CFG6) { + // Block0 + // | + // Block1 + // | + // Block2 ++++++++++++++ + // | + + // Block3 + + // / \ + + // Block8 Block4 + + // | / \ + + // Block5 <- Block9 Block6 + + // | + // Block7 + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::GOTO | 0x0100, + Instruction::IF_EQ, 0x0004, + Instruction::IF_EQ, 0x0003, + Instruction::RETURN_VOID, + Instruction::GOTO | 0xFA00); + + const int blocks[] = {0, 1, 2, 3, 4, 6, 9, 8, 5, 7}; + TestCode(data, blocks, arraysize(blocks)); +} + +TEST(LinearizeTest, CFG7) { + // Structure of this graph (+ are back edges) + // Block0 + // | + // Block1 + // | + // Block2 ++++++++ + // | + + // Block3 + + // / \ + + // Block4 Block8 + + // / \ | + + // Block5 Block9 - Block6 + + // | + // Block7 + // + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::GOTO | 0x0100, + Instruction::IF_EQ, 0x0005, + Instruction::IF_EQ, 0x0003, + Instruction::RETURN_VOID, + Instruction::GOTO | 0xFA00); + + const int blocks[] = {0, 1, 2, 3, 4, 9, 8, 6, 5, 7}; + TestCode(data, blocks, arraysize(blocks)); +} + } // namespace art diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 89c949563b..e3c6fec23b 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -386,7 +386,7 @@ TEST(LiveRangesTest, CFG4) { Instruction::ADD_INT, 1 << 8, Instruction::GOTO | 0x300, Instruction::ADD_INT, 1 << 8, - Instruction::RETURN | 1 << 8); + Instruction::RETURN); ArenaPool pool; ArenaAllocator allocator(&pool); @@ -410,7 +410,10 @@ TEST(LiveRangesTest, CFG4) { interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval(); range = interval->GetFirstRange(); ASSERT_EQ(4u, range->GetStart()); - ASSERT_EQ(28u, range->GetEnd()); + ASSERT_EQ(17u, range->GetEnd()); + range = range->GetNext(); + ASSERT_EQ(20u, range->GetStart()); + ASSERT_EQ(23u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); // Test for the first add. @@ -429,9 +432,8 @@ TEST(LiveRangesTest, CFG4) { ASSERT_EQ(26u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); - // Test for the phi, which is unused. HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi(); - ASSERT_EQ(phi->NumberOfUses(), 0u); + ASSERT_EQ(phi->NumberOfUses(), 1u); interval = phi->GetLiveInterval(); range = interval->GetFirstRange(); ASSERT_EQ(26u, range->GetStart()); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index d1555d4e11..1ff26d914c 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -161,7 +161,14 @@ class Location : public ValueObject { } template <typename T> - T As() const { + T AsRegister() const { + DCHECK(IsRegister()); + return static_cast<T>(reg()); + } + + template <typename T> + T AsFpuRegister() const { + DCHECK(IsFpuRegister()); return static_cast<T>(reg()); } @@ -391,6 +398,10 @@ class RegisterSet : public ValueObject { return (register_set & (1 << reg)) != 0; } + size_t GetNumberOfRegisters() const { + return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_); + } + private: uint32_t core_registers_; uint32_t floating_point_registers_; @@ -503,6 +514,10 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return &live_registers_; } + size_t GetNumberOfLiveRegisters() const { + return live_registers_.GetNumberOfRegisters(); + } + bool InputOverlapsWithOutputOrTemp(uint32_t input_index, bool is_environment) const { if (is_environment) return true; if ((input_index == 0) diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7d52d7d221..28496e4ad2 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -42,6 +42,9 @@ static const int kDefaultNumberOfPredecessors = 2; static const int kDefaultNumberOfDominatedBlocks = 1; static const int kDefaultNumberOfBackEdges = 1; +static constexpr uint32_t kMaxIntShiftValue = 0x1f; +static constexpr uint64_t kMaxLongShiftValue = 0x3f; + enum IfCondition { kCondEQ, kCondNE, @@ -233,7 +236,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return false; } - int NumberOfBackEdges() const { + size_t NumberOfBackEdges() const { return back_edges_.Size(); } @@ -521,9 +524,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { M(ParallelMove, Instruction) \ M(ParameterValue, Instruction) \ M(Phi, Instruction) \ - M(Rem, BinaryOperation) \ + M(Rem, BinaryOperation) \ M(Return, Instruction) \ M(ReturnVoid, Instruction) \ + M(Shl, BinaryOperation) \ + M(Shr, BinaryOperation) \ M(StaticFieldGet, Instruction) \ M(StaticFieldSet, Instruction) \ M(StoreLocal, Instruction) \ @@ -532,6 +537,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { M(Temporary, Instruction) \ M(Throw, Instruction) \ M(TypeConversion, Instruction) \ + M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ #define FOR_EACH_INSTRUCTION(M) \ @@ -771,7 +777,7 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { } // Returns whether two instructions are equal, that is: - // 1) They have the same type and contain the same data, + // 1) They have the same type and contain the same data (InstructionDataEquals). // 2) Their inputs are identical. bool Equals(HInstruction* other) const; @@ -1357,28 +1363,45 @@ class HGreaterThanOrEqual : public HCondition { // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1. class HCompare : public HBinaryOperation { public: - HCompare(Primitive::Type type, HInstruction* first, HInstruction* second) - : HBinaryOperation(Primitive::kPrimInt, first, second) { + // The bias applies for floating point operations and indicates how NaN + // comparisons are treated: + enum Bias { + kNoBias, // bias is not applicable (i.e. for long operation) + kGtBias, // return 1 for NaN comparisons + kLtBias, // return -1 for NaN comparisons + }; + + HCompare(Primitive::Type type, HInstruction* first, HInstruction* second, Bias bias) + : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias) { DCHECK_EQ(type, first->GetType()); DCHECK_EQ(type, second->GetType()); } - virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { + int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x == y ? 0 : x > y ? 1 : -1; } - virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { + + int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x == y ? 0 : x > y ? 1 : -1; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return bias_ == other->AsCompare()->bias_; + } + + bool IsGtBias() { return bias_ == kGtBias; } + DECLARE_INSTRUCTION(Compare); private: + const Bias bias_; + DISALLOW_COPY_AND_ASSIGN(HCompare); }; @@ -1831,6 +1854,57 @@ class HDivZeroCheck : public HExpression<1> { DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck); }; +class HShl : public HBinaryOperation { + public: + HShl(Primitive::Type result_type, HInstruction* left, HInstruction* right) + : HBinaryOperation(result_type, left, right) {} + + int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x << (y & kMaxIntShiftValue); } + int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x << (y & kMaxLongShiftValue); } + + DECLARE_INSTRUCTION(Shl); + + private: + DISALLOW_COPY_AND_ASSIGN(HShl); +}; + +class HShr : public HBinaryOperation { + public: + HShr(Primitive::Type result_type, HInstruction* left, HInstruction* right) + : HBinaryOperation(result_type, left, right) {} + + int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { return x >> (y & kMaxIntShiftValue); } + int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { return x >> (y & kMaxLongShiftValue); } + + DECLARE_INSTRUCTION(Shr); + + private: + DISALLOW_COPY_AND_ASSIGN(HShr); +}; + +class HUShr : public HBinaryOperation { + public: + HUShr(Primitive::Type result_type, HInstruction* left, HInstruction* right) + : HBinaryOperation(result_type, left, right) {} + + int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE { + uint32_t ux = static_cast<uint32_t>(x); + uint32_t uy = static_cast<uint32_t>(y) & kMaxIntShiftValue; + return static_cast<int32_t>(ux >> uy); + } + + int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE { + uint64_t ux = static_cast<uint64_t>(x); + uint64_t uy = static_cast<uint64_t>(y) & kMaxLongShiftValue; + return static_cast<int64_t>(ux >> uy); + } + + DECLARE_INSTRUCTION(UShr); + + private: + DISALLOW_COPY_AND_ASSIGN(HUShr); +}; + class HAnd : public HBinaryOperation { public: HAnd(Primitive::Type result_type, HInstruction* left, HInstruction* right) diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index ea98186d11..b99f6784f7 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -21,25 +21,21 @@ namespace art { -void HOptimization::Execute() { - Run(); - visualizer_.DumpGraph(pass_name_); - Check(); -} - void HOptimization::Check() { if (kIsDebugBuild) { if (is_in_ssa_form_) { SSAChecker checker(graph_->GetArena(), graph_); checker.Run(); if (!checker.IsValid()) { - LOG(FATAL) << Dumpable<SSAChecker>(checker); + LOG(FATAL) << "Error after " << GetPassName() << ": " + << Dumpable<SSAChecker>(checker); } } else { GraphChecker checker(graph_->GetArena(), graph_); checker.Run(); if (!checker.IsValid()) { - LOG(FATAL) << Dumpable<GraphChecker>(checker); + LOG(FATAL) << "Error after " << GetPassName() << ": " + << Dumpable<GraphChecker>(checker); } } } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 59683e2075..e36ef198b6 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,7 +17,6 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ -#include "graph_visualizer.h" #include "nodes.h" namespace art { @@ -29,25 +28,19 @@ class HOptimization : public ValueObject { public: HOptimization(HGraph* graph, bool is_in_ssa_form, - const char* pass_name, - const HGraphVisualizer& visualizer) + const char* pass_name) : graph_(graph), is_in_ssa_form_(is_in_ssa_form), - pass_name_(pass_name), - visualizer_(visualizer) {} + pass_name_(pass_name) {} virtual ~HOptimization() {} - // Execute the optimization pass. - void Execute(); - // Return the name of the pass. const char* GetPassName() const { return pass_name_; } // Peform the analysis itself. virtual void Run() = 0; - private: // Verify the graph; abort if it is not valid. void Check(); @@ -59,9 +52,6 @@ class HOptimization : public ValueObject { const bool is_in_ssa_form_; // Optimization pass name. const char* pass_name_; - // A graph visualiser invoked after the execution of the optimization - // pass if enabled. - const HGraphVisualizer& visualizer_; DISALLOW_COPY_AND_ASSIGN(HOptimization); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 0de0907520..d8533eb8bf 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -35,6 +35,7 @@ #include "nodes.h" #include "prepare_for_register_allocation.h" #include "register_allocator.h" +#include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" #include "utils/arena_allocator.h" @@ -167,7 +168,8 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, } uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const { - return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode()); + return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( + InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); } bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer, @@ -189,6 +191,35 @@ static bool CanOptimize(const DexFile::CodeItem& code_item) { return code_item.tries_size_ == 0; } +static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) { + TransformToSsa ssa(graph); + HDeadCodeElimination opt1(graph); + HConstantFolding opt2(graph); + SsaRedundantPhiElimination opt3(graph); + SsaDeadPhiElimination opt4(graph); + InstructionSimplifier opt5(graph); + GVNOptimization opt6(graph); + InstructionSimplifier opt7(graph); + + HOptimization* optimizations[] = { + &ssa, + &opt1, + &opt2, + &opt3, + &opt4, + &opt5, + &opt6, + &opt7 + }; + + for (size_t i = 0; i < arraysize(optimizations); ++i) { + HOptimization* optimization = optimizations[i]; + optimization->Run(); + visualizer.DumpGraph(optimization->GetPassName()); + optimization->Check(); + } +} + CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, @@ -251,22 +282,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, && CanOptimize(*code_item) && RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) { optimized_compiled_methods_++; - graph->BuildDominatorTree(); - graph->TransformToSSA(); - visualizer.DumpGraph("ssa"); - graph->FindNaturalLoops(); - - HDeadCodeElimination(graph, visualizer).Execute(); - HConstantFolding(graph, visualizer).Execute(); - - SsaRedundantPhiElimination(graph).Run(); - SsaDeadPhiElimination(graph).Run(); - InstructionSimplifier(graph).Run(); - GlobalValueNumberer(graph->GetArena(), graph).Run(); - visualizer.DumpGraph(kGVNPassName); - InstructionSimplifier(graph).Run(); - PrepareForRegisterAllocation(graph).Run(); + RunOptimizations(graph, visualizer); + PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); visualizer.DumpGraph(kLivenessPassName); @@ -309,7 +327,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, graph->FindNaturalLoops(); SsaRedundantPhiElimination(graph).Run(); SsaDeadPhiElimination(graph).Run(); - GlobalValueNumberer(graph->GetArena(), graph).Run(); + GVNOptimization(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); visualizer.DumpGraph(kLivenessPassName); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 4d6e66413d..a6c06359a0 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -70,7 +70,8 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, it.Advance()) { HInstruction* current = it.Current(); if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false; - if ((current->GetType() == Primitive::kPrimFloat || current->GetType() == Primitive::kPrimDouble) + if ((current->GetType() == Primitive::kPrimFloat + || current->GetType() == Primitive::kPrimDouble) && instruction_set != kX86_64) { return false; } @@ -95,6 +96,25 @@ void RegisterAllocator::AllocateRegisters() { ValidateInternal(true); processing_core_registers_ = false; ValidateInternal(true); + // Check that the linear order is still correct with regards to lifetime positions. + // Since only parallel moves have been inserted during the register allocation, + // these checks are mostly for making sure these moves have been added correctly. + size_t current_liveness = 0; + for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HInstruction* instruction = inst_it.Current(); + DCHECK_LE(current_liveness, instruction->GetLifetimePosition()); + current_liveness = instruction->GetLifetimePosition(); + } + for (HInstructionIterator inst_it(block->GetInstructions()); + !inst_it.Done(); + inst_it.Advance()) { + HInstruction* instruction = inst_it.Current(); + DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName(); + current_liveness = instruction->GetLifetimePosition(); + } + } } } @@ -189,11 +209,29 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { BlockRegister(temp, position, position + 1); } else { DCHECK(temp.IsUnallocated()); - DCHECK(temp.GetPolicy() == Location::kRequiresRegister); - LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); - temp_intervals_.Add(interval); - interval->AddRange(position, position + 1); - unhandled_core_intervals_.Add(interval); + switch (temp.GetPolicy()) { + case Location::kRequiresRegister: { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); + temp_intervals_.Add(interval); + interval->AddRange(position, position + 1); + unhandled_core_intervals_.Add(interval); + break; + } + + case Location::kRequiresFpuRegister: { + LiveInterval* interval = + LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); + temp_intervals_.Add(interval); + interval->AddRange(position, position + 1); + unhandled_fp_intervals_.Add(interval); + break; + } + + default: + LOG(FATAL) << "Unexpected policy for temporary location " + << temp.GetPolicy(); + } } } @@ -216,8 +254,8 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { // maximum before updating locations. LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); interval->AddRange(position, position + 1); - unhandled_core_intervals_.Add(interval); - unhandled_fp_intervals_.Add(interval); + AddSorted(&unhandled_core_intervals_, interval); + AddSorted(&unhandled_fp_intervals_, interval); } } @@ -250,6 +288,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { : unhandled_fp_intervals_; DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + // Some instructions define their output in fixed register/stack slot. We need // to ensure we know these locations before doing register allocation. For a // given register, we create an interval that covers these locations. The register @@ -475,6 +514,7 @@ void RegisterAllocator::LinearScan() { LiveInterval* current = unhandled_->Pop(); DCHECK(!current->IsFixed() && !current->HasSpillSlot()); DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); + size_t position = current->GetStart(); // Remember the inactive_ size here since the ones moved to inactive_ from @@ -520,6 +560,7 @@ void RegisterAllocator::LinearScan() { // at safepoints. No need to allocate a register for it. maximum_number_of_live_registers_ = std::max(maximum_number_of_live_registers_, active_.Size()); + DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); continue; } @@ -764,6 +805,12 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter if (current->StartsAfter(interval)) { insert_at = i; break; + } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { + // Ensure the slow path interval is the last to be processed at its location: we want the + // interval to know all live registers at this location. + DCHECK(i == 1 || array->Get(i - 2)->StartsAfter(current)); + insert_at = i; + break; } } array->InsertAt(insert_at, interval); @@ -876,6 +923,14 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user, move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr)); } +static bool IsInstructionStart(size_t position) { + return (position & 1) == 0; +} + +static bool IsInstructionEnd(size_t position) { + return (position & 1) == 1; +} + void RegisterAllocator::InsertParallelMoveAt(size_t position, HInstruction* instruction, Location source, @@ -884,12 +939,29 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position, if (source.Equals(destination)) return; HInstruction* at = liveness_.GetInstructionFromPosition(position / 2); - if (at == nullptr) { - // Block boundary, don't do anything the connection of split siblings will handle it. - return; - } HParallelMove* move; - if ((position & 1) == 1) { + if (at == nullptr) { + if (IsInstructionStart(position)) { + // Block boundary, don't do anything the connection of split siblings will handle it. + return; + } else { + // Move must happen before the first instruction of the block. + at = liveness_.GetInstructionFromPosition((position + 1) / 2); + // Note that parallel moves may have already been inserted, so we explicitly + // ask for the first instruction of the block: `GetInstructionFromPosition` does + // not contain the moves. + at = at->GetBlock()->GetFirstInstruction(); + if (at->GetLifetimePosition() != position) { + DCHECK_GT(at->GetLifetimePosition(), position); + move = new (allocator_) HParallelMove(allocator_); + move->SetLifetimePosition(position); + at->GetBlock()->InsertInstructionBefore(move, at); + } else { + DCHECK(at->IsParallelMove()); + move = at->AsParallelMove(); + } + } + } else if (IsInstructionEnd(position)) { // Move must happen after the instruction. DCHECK(!at->IsControlFlow()); move = at->GetNext()->AsParallelMove(); @@ -941,10 +1013,11 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, HParallelMove* move; // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and output moves. + size_t position = last->GetLifetimePosition(); if (previous == nullptr || !previous->IsParallelMove() - || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) { + || previous->AsParallelMove()->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); - move->SetLifetimePosition(block->GetLifetimeEnd()); + move->SetLifetimePosition(position); block->InsertInstructionBefore(move, last); } else { move = previous->AsParallelMove(); @@ -1062,6 +1135,8 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { switch (source.GetKind()) { case Location::kRegister: { locations->AddLiveRegister(source); + DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_); + if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg()); } @@ -1095,12 +1170,10 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, return; } + // Intervals end at the lifetime end of a block. The decrement by one + // ensures the `Cover` call will return true. size_t from_position = from->GetLifetimeEnd() - 1; - // When an instruction dies at entry of another, and the latter is the beginning - // of a block, the register allocator ensures the former has a register - // at block->GetLifetimeStart() + 1. Since this is at a block boundary, it must - // must be handled in this method. - size_t to_position = to->GetLifetimeStart() + 1; + size_t to_position = to->GetLifetimeStart(); LiveInterval* destination = nullptr; LiveInterval* source = nullptr; @@ -1238,9 +1311,27 @@ void RegisterAllocator::Resolve() { current = at; } LocationSummary* locations = at->GetLocations(); - DCHECK(temp->GetType() == Primitive::kPrimInt); - locations->SetTempAt( - temp_index++, Location::RegisterLocation(temp->GetRegister())); + switch (temp->GetType()) { + case Primitive::kPrimInt: + locations->SetTempAt( + temp_index++, Location::RegisterLocation(temp->GetRegister())); + break; + + case Primitive::kPrimDouble: + // TODO: Support the case of ARM, where a double value + // requires an FPU register pair (note that the ARM back end + // does not yet use this register allocator when a method uses + // floats or doubles). + DCHECK(codegen_->GetInstructionSet() != kArm + && codegen_->GetInstructionSet() != kThumb2); + locations->SetTempAt( + temp_index++, Location::FpuRegisterLocation(temp->GetRegister())); + break; + + default: + LOG(FATAL) << "Unexpected type for temporary location " + << temp->GetType(); + } } } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index b2cc11996e..edfafcdd83 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -18,6 +18,7 @@ #include "nodes.h" #include "ssa_type_propagation.h" +#include "ssa_phi_elimination.h" namespace art { @@ -41,11 +42,20 @@ void SsaBuilder::BuildSsa() { } } - // 3) Propagate types of phis. + // 3) Remove dead phis. This will remove phis that are only used by environments: + // at the DEX level, the type of these phis does not need to be consistent, but + // our code generator will complain if the inputs of a phi do not have the same + // type (modulo the special case of `null`). + SsaDeadPhiElimination dead_phis(GetGraph()); + dead_phis.Run(); + + // 4) Propagate types of phis. At this point, phis are typed void in the general + // case, or float or double when we created a floating-point equivalent. So we + // need to propagate the types across phis to give them a correct type. SsaTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 4) Clear locals. + // 5) Clear locals. // TODO: Move this to a dead code eliminator phase. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 2207cd6bfa..5ab328fe23 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -18,9 +18,24 @@ #define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ #include "nodes.h" +#include "optimization.h" namespace art { +class TransformToSsa : public HOptimization { + public: + explicit TransformToSsa(HGraph* graph) : HOptimization(graph, true, "ssa transform") {} + + void Run() OVERRIDE { + graph_->BuildDominatorTree(); + graph_->TransformToSSA(); + graph_->FindNaturalLoops(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(TransformToSsa); +}; + static constexpr int kDefaultNumberOfLoops = 2; class SsaBuilder : public HGraphVisitor { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 0085b27c58..660a5c5f60 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -28,11 +28,6 @@ void SsaLivenessAnalysis::Analyze() { ComputeLiveness(); } -static bool IsLoopExit(HLoopInformation* current, HLoopInformation* to) { - // `to` is either not part of a loop, or `current` is an inner loop of `to`. - return to == nullptr || (current != to && current->IsIn(*to)); -} - static bool IsLoop(HLoopInformation* info) { return info != nullptr; } @@ -48,46 +43,64 @@ static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) { && inner->IsIn(*outer); } -static void VisitBlockForLinearization(HBasicBlock* block, - GrowableArray<HBasicBlock*>* order, - ArenaBitVector* visited) { - if (visited->IsBitSet(block->GetBlockId())) { - return; - } - visited->SetBit(block->GetBlockId()); - size_t number_of_successors = block->GetSuccessors().Size(); - if (number_of_successors == 0) { - // Nothing to do. - } else if (number_of_successors == 1) { - VisitBlockForLinearization(block->GetSuccessors().Get(0), order, visited); - } else { - DCHECK_EQ(number_of_successors, 2u); - HBasicBlock* first_successor = block->GetSuccessors().Get(0); - HBasicBlock* second_successor = block->GetSuccessors().Get(1); - HLoopInformation* my_loop = block->GetLoopInformation(); - HLoopInformation* first_loop = first_successor->GetLoopInformation(); - HLoopInformation* second_loop = second_successor->GetLoopInformation(); - - if (!IsLoop(my_loop)) { - // Nothing to do. Current order is fine. - } else if (IsLoopExit(my_loop, second_loop) && InSameLoop(my_loop, first_loop)) { - // Visit the loop exit first in post order. - std::swap(first_successor, second_successor); - } else if (IsInnerLoop(my_loop, first_loop) && !IsInnerLoop(my_loop, second_loop)) { - // Visit the inner loop last in post order. - std::swap(first_successor, second_successor); +static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBasicBlock* block) { + size_t insert_at = worklist->Size(); + HLoopInformation* block_loop = block->GetLoopInformation(); + for (; insert_at > 0; --insert_at) { + HBasicBlock* current = worklist->Get(insert_at - 1); + HLoopInformation* current_loop = current->GetLoopInformation(); + if (InSameLoop(block_loop, current_loop) + || !IsLoop(current_loop) + || IsInnerLoop(current_loop, block_loop)) { + // The block can be processed immediately. + break; } - VisitBlockForLinearization(first_successor, order, visited); - VisitBlockForLinearization(second_successor, order, visited); } - order->Add(block); + worklist->InsertAt(insert_at, block); } void SsaLivenessAnalysis::LinearizeGraph() { - // For simplicity of the implementation, we create post linear order. The order for - // computing live ranges is the reverse of that order. - ArenaBitVector visited(graph_.GetArena(), graph_.GetBlocks().Size(), false); - VisitBlockForLinearization(graph_.GetEntryBlock(), &linear_post_order_, &visited); + // Create a reverse post ordering with the following properties: + // - Blocks in a loop are consecutive, + // - Back-edge is the last block before loop exits. + + // (1): Record the number of forward predecessors for each block. This is to + // ensure the resulting order is reverse post order. We could use the + // current reverse post order in the graph, but it would require making + // order queries to a GrowableArray, which is not the best data structure + // for it. + GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size()); + forward_predecessors.SetSize(graph_.GetBlocks().Size()); + for (size_t i = 0, e = graph_.GetBlocks().Size(); i < e; ++i) { + HBasicBlock* block = graph_.GetBlocks().Get(i); + size_t number_of_forward_predecessors = block->GetPredecessors().Size(); + if (block->IsLoopHeader()) { + // We rely on having simplified the CFG. + DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges()); + number_of_forward_predecessors--; + } + forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); + } + + // (2): Following a worklist approach, first start with the entry block, and + // iterate over the successors. When all non-back edge predecessors of a + // successor block are visited, the successor block is added in the worklist + // following an order that satisfies the requirements to build our linear graph. + GrowableArray<HBasicBlock*> worklist(graph_.GetArena(), 1); + worklist.Add(graph_.GetEntryBlock()); + do { + HBasicBlock* current = worklist.Pop(); + linear_order_.Add(current); + for (size_t i = 0, e = current->GetSuccessors().Size(); i < e; ++i) { + HBasicBlock* successor = current->GetSuccessors().Get(i); + int block_id = successor->GetBlockId(); + size_t number_of_remaining_predecessors = forward_predecessors.Get(block_id); + if (number_of_remaining_predecessors == 1) { + AddToListForLinearization(&worklist, successor); + } + forward_predecessors.Put(block_id, number_of_remaining_predecessors - 1); + } + } while (!worklist.IsEmpty()); } void SsaLivenessAnalysis::NumberInstructions() { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index ca08d5b3e6..23123891ef 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -582,7 +582,7 @@ class SsaLivenessAnalysis : public ValueObject { SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen) : graph_(graph), codegen_(codegen), - linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()), + linear_order_(graph.GetArena(), graph.GetBlocks().Size()), block_infos_(graph.GetArena(), graph.GetBlocks().Size()), instructions_from_ssa_index_(graph.GetArena(), 0), instructions_from_lifetime_position_(graph.GetArena(), 0), @@ -604,8 +604,8 @@ class SsaLivenessAnalysis : public ValueObject { return &block_infos_.Get(block.GetBlockId())->kill_; } - const GrowableArray<HBasicBlock*>& GetLinearPostOrder() const { - return linear_post_order_; + const GrowableArray<HBasicBlock*>& GetLinearOrder() const { + return linear_order_; } HInstruction* GetInstructionFromSsaIndex(size_t index) const { @@ -661,7 +661,7 @@ class SsaLivenessAnalysis : public ValueObject { const HGraph& graph_; CodeGenerator* const codegen_; - GrowableArray<HBasicBlock*> linear_post_order_; + GrowableArray<HBasicBlock*> linear_order_; GrowableArray<BlockInfo*> block_infos_; // Temporary array used when computing live_in, live_out, and kill sets. @@ -674,36 +674,41 @@ class SsaLivenessAnalysis : public ValueObject { DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis); }; -class HLinearOrderIterator : public ValueObject { +class HLinearPostOrderIterator : public ValueObject { public: - explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness) - : post_order_(liveness.GetLinearPostOrder()), index_(liveness.GetLinearPostOrder().Size()) {} + explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness) + : order_(liveness.GetLinearOrder()), index_(liveness.GetLinearOrder().Size()) {} bool Done() const { return index_ == 0; } - HBasicBlock* Current() const { return post_order_.Get(index_ -1); } - void Advance() { --index_; DCHECK_GE(index_, 0U); } + + HBasicBlock* Current() const { return order_.Get(index_ -1); } + + void Advance() { + --index_; + DCHECK_GE(index_, 0U); + } private: - const GrowableArray<HBasicBlock*>& post_order_; + const GrowableArray<HBasicBlock*>& order_; size_t index_; - DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); + DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); }; -class HLinearPostOrderIterator : public ValueObject { +class HLinearOrderIterator : public ValueObject { public: - explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness) - : post_order_(liveness.GetLinearPostOrder()), index_(0) {} + explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness) + : order_(liveness.GetLinearOrder()), index_(0) {} - bool Done() const { return index_ == post_order_.Size(); } - HBasicBlock* Current() const { return post_order_.Get(index_); } + bool Done() const { return index_ == order_.Size(); } + HBasicBlock* Current() const { return order_.Get(index_); } void Advance() { ++index_; } private: - const GrowableArray<HBasicBlock*>& post_order_; + const GrowableArray<HBasicBlock*>& order_; size_t index_; - DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); + DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); }; } // namespace art diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 56979e1c6a..58cea771b9 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -24,6 +24,8 @@ void SsaDeadPhiElimination::Run() { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); + // Set dead ahead of running through uses. The phi may have no use. + phi->SetDead(); for (HUseIterator<HInstruction> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HInstruction>* current = use_it.Current(); HInstruction* user = current->GetUser(); @@ -31,8 +33,6 @@ void SsaDeadPhiElimination::Run() { worklist_.Add(phi); phi->SetLive(); break; - } else { - phi->SetDead(); } } } @@ -65,8 +65,8 @@ void SsaDeadPhiElimination::Run() { use_it.Advance()) { HUseListNode<HInstruction>* user_node = use_it.Current(); HInstruction* user = user_node->GetUser(); - DCHECK(user->IsLoopHeaderPhi()); - DCHECK(user->AsPhi()->IsDead()); + DCHECK(user->IsLoopHeaderPhi()) << user->GetId(); + DCHECK(user->AsPhi()->IsDead()) << user->GetId(); // Just put itself as an input. The phi will be removed in this loop anyway. user->SetRawInputAt(user_node->GetIndex(), user); current->RemoveUser(user, user_node->GetIndex()); diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index 5274f09f3f..b7899712d6 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ #include "nodes.h" +#include "optimization.h" namespace art { @@ -25,15 +26,15 @@ namespace art { * Optimization phase that removes dead phis from the graph. Dead phis are unused * phis, or phis only used by other phis. */ -class SsaDeadPhiElimination : public ValueObject { +class SsaDeadPhiElimination : public HOptimization { public: explicit SsaDeadPhiElimination(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + : HOptimization(graph, true, "dead_phi_elimination"), + worklist_(graph->GetArena(), kDefaultWorklistSize) {} - void Run(); + void Run() OVERRIDE; private: - HGraph* const graph_; GrowableArray<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; @@ -47,15 +48,15 @@ class SsaDeadPhiElimination : public ValueObject { * registers might be updated with the same value, or not updated at all. We can just * replace the phi with the value when entering the loop. */ -class SsaRedundantPhiElimination : public ValueObject { +class SsaRedundantPhiElimination : public HOptimization { public: explicit SsaRedundantPhiElimination(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + : HOptimization(graph, true, "redundant_phi_elimination"), + worklist_(graph->GetArena(), kDefaultWorklistSize) {} - void Run(); + void Run() OVERRIDE; private: - HGraph* const graph_; GrowableArray<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index fffe5c2b44..6174dd49a1 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -199,29 +199,31 @@ TEST(SsaTest, Loop1) { // Test that we create a phi for an initialized local at entry of a loop. const char* expected = "BasicBlock 0, succ: 1\n" - " 0: IntConstant 0 [6, 4, 2, 2]\n" - " 1: Goto\n" - "BasicBlock 1, pred: 0, succ: 5, 6\n" - " 2: Equal(0, 0) [3]\n" - " 3: If(2)\n" - "BasicBlock 2, pred: 3, 6, succ: 3\n" - " 4: Phi(6, 0) [6]\n" + " 0: IntConstant 0 [6, 3, 3]\n" + " 1: IntConstant 4 [6]\n" + " 2: Goto\n" + "BasicBlock 1, pred: 0, succ: 4, 2\n" + " 3: Equal(0, 0) [4]\n" + " 4: If(3)\n" + "BasicBlock 2, pred: 1, succ: 3\n" " 5: Goto\n" - "BasicBlock 3, pred: 5, 2, succ: 2\n" - " 6: Phi(0, 4) [4]\n" + "BasicBlock 3, pred: 2, 4, succ: 5\n" + " 6: Phi(1, 0) [9]\n" " 7: Goto\n" - "BasicBlock 4\n" - // Synthesized blocks to avoid critical edge. - "BasicBlock 5, pred: 1, succ: 3\n" + "BasicBlock 4, pred: 1, succ: 3\n" " 8: Goto\n" - "BasicBlock 6, pred: 1, succ: 2\n" - " 9: Goto\n"; + "BasicBlock 5, pred: 3, succ: 6\n" + " 9: Return(6)\n" + "BasicBlock 6, pred: 5\n" + " 10: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, - Instruction::IF_EQ, 3, - Instruction::GOTO | 0x100, - Instruction::GOTO | 0xFF00); + Instruction::IF_EQ, 4, + Instruction::CONST_4 | 4 << 12 | 0, + Instruction::GOTO | 0x200, + Instruction::GOTO | 0xFF00, + Instruction::RETURN | 0 << 8); TestCode(data, expected); } |