diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 159 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 14 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 50 |
3 files changed, 182 insertions, 41 deletions
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 9e08558c8e..1604a7c107 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -429,7 +429,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - isa_features_(isa_features) { + isa_features_(isa_features), + constant_area_start_(0) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1952,7 +1953,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2016,12 +2017,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ addss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ addsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2049,7 +2068,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2087,12 +2106,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ subss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ subsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2125,7 +2162,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2170,13 +2207,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: { DCHECK(first.Equals(locations->Out())); - __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ mulss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { DCHECK(first.Equals(locations->Out())); - __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2566,7 +2621,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2591,12 +2646,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ divss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ divsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -4135,5 +4208,65 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + if (!__ IsConstantAreaEmpty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. If used for vectors at a later time, this will need to be + // updated to 16 bytes with the appropriate offset. + __ Align(4, 0); + constant_area_start_ = __ CodeSize(); + __ AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { + public: + RIPFixup(CodeGeneratorX86_64& codegen, int offset) + : codegen_(codegen), offset_into_constant_area_(offset) {} + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; + int relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + CodeGeneratorX86_64& codegen_; + + // Location in constant area that the fixup refers to. + int offset_into_constant_area_; +}; + +Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); + return Address::RIP(fixup); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index be2a79e55e..c819eecaf0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -246,6 +246,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void Finalize(CodeAllocator* allocator) OVERRIDE; InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kX86_64; @@ -277,6 +278,15 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } + int ConstantAreaStart() const { + return constant_area_start_; + } + + Address LiteralDoubleAddress(double v); + Address LiteralFloatAddress(float v); + Address LiteralInt32Address(int32_t v); + Address LiteralInt64Address(int64_t v); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -287,6 +297,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { X86_64Assembler assembler_; const X86_64InstructionSetFeatures& isa_features_; + // Offset to start of the constant area in the assembled code. + // Used for fixups to the constant area. + int constant_area_start_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 5122a00d92..c0c4ff3388 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -298,25 +298,23 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) // TODO: Allow x86 to work with memory. This requires assembler support, see below. // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. - locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. + locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } -static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { +static void MathAbsFP(LocationSummary* locations, bool is64bit, + X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (output.IsFpuRegister()) { // In-register - XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Can mask directly with constant area if we align on 16 bytes. if (is64bit) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } else { @@ -341,7 +339,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { @@ -349,7 +347,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -400,7 +398,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { } static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, - X86_64Assembler* assembler) { + X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) { Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -427,7 +425,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // // This removes one jmp, but needs to copy one input (op1) to out. // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); @@ -461,14 +459,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // NaN handling. __ Bind(&nan); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. if (is_double) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); } - __ movd(out, cpu_temp, is_double); __ jmp(&done); // out := op2; @@ -483,7 +478,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, __ Bind(&done); } -static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -492,39 +487,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo // The following is sub-optimal, but all we can do for now. It would be fine to also accept // the second input to be the output (we can simply swap inputs). locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. } void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, |