diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2015-03-26 21:07:46 -0400 |
---|---|---|
committer | Mark Mendell <mark.p.mendell@intel.com> | 2015-04-09 10:47:11 -0400 |
commit | f55c3e0825cdfc4c5a27730031177d1a0198ec5a (patch) | |
tree | 500a596838d0ec2bfb3f84ea3c4d87b35d5ca22f | |
parent | 96159860fc6c4bf68a51a8a57941971f122685d6 (diff) | |
download | android_art-f55c3e0825cdfc4c5a27730031177d1a0198ec5a.tar.gz android_art-f55c3e0825cdfc4c5a27730031177d1a0198ec5a.tar.bz2 android_art-f55c3e0825cdfc4c5a27730031177d1a0198ec5a.zip |
[optimizing] Add RIP support for x86_64
Support a constant area addressed using RIP on x86_64. Use it for FP
operations to avoid loading constants into a CPU register and moving
to a XMM register.
Change-Id: I58421759ef2a8475538876c20e696ec787015a72
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 159 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 14 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 50 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 54 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 112 |
5 files changed, 346 insertions, 43 deletions
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 9e08558c8e..1604a7c107 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -429,7 +429,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - isa_features_(isa_features) { + isa_features_(isa_features), + constant_area_start_(0) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1952,7 +1953,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2016,12 +2017,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ addss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ addsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2049,7 +2068,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2087,12 +2106,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ subss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ subsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2125,7 +2162,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2170,13 +2207,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: { DCHECK(first.Equals(locations->Out())); - __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ mulss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { DCHECK(first.Equals(locations->Out())); - __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2566,7 +2621,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2591,12 +2646,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ divss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ divsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -4135,5 +4208,65 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + if (!__ IsConstantAreaEmpty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. If used for vectors at a later time, this will need to be + // updated to 16 bytes with the appropriate offset. + __ Align(4, 0); + constant_area_start_ = __ CodeSize(); + __ AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { + public: + RIPFixup(CodeGeneratorX86_64& codegen, int offset) + : codegen_(codegen), offset_into_constant_area_(offset) {} + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; + int relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + CodeGeneratorX86_64& codegen_; + + // Location in constant area that the fixup refers to. + int offset_into_constant_area_; +}; + +Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); + return Address::RIP(fixup); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index be2a79e55e..c819eecaf0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -246,6 +246,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void Finalize(CodeAllocator* allocator) OVERRIDE; InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kX86_64; @@ -277,6 +278,15 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } + int ConstantAreaStart() const { + return constant_area_start_; + } + + Address LiteralDoubleAddress(double v); + Address LiteralFloatAddress(float v); + Address LiteralInt32Address(int32_t v); + Address LiteralInt64Address(int64_t v); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -287,6 +297,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { X86_64Assembler assembler_; const X86_64InstructionSetFeatures& isa_features_; + // Offset to start of the constant area in the assembled code. + // Used for fixups to the constant area. + int constant_area_start_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 5122a00d92..c0c4ff3388 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -298,25 +298,23 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) // TODO: Allow x86 to work with memory. This requires assembler support, see below. // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. - locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. + locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } -static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { +static void MathAbsFP(LocationSummary* locations, bool is64bit, + X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (output.IsFpuRegister()) { // In-register - XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Can mask directly with constant area if we align on 16 bytes. if (is64bit) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } else { @@ -341,7 +339,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { @@ -349,7 +347,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -400,7 +398,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { } static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, - X86_64Assembler* assembler) { + X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) { Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -427,7 +425,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // // This removes one jmp, but needs to copy one input (op1) to out. // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); @@ -461,14 +459,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // NaN handling. __ Bind(&nan); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. if (is_double) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); } - __ movd(out, cpu_temp, is_double); __ jmp(&done); // out := op2; @@ -483,7 +478,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, __ Bind(&done); } -static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -492,39 +487,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo // The following is sub-optimal, but all we can do for now. It would be fine to also accept // the second input to be the output (we can simply swap inputs). locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. } void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 30e821860c..a9e8c501ea 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1975,6 +1975,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) for (int i = 1; i < length; i++) { EmitUint8(operand.encoding_[i]); } + AssemblerFixup* fixup = operand.GetFixup(); + if (fixup != nullptr) { + EmitFixup(fixup); + } } @@ -2702,5 +2706,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) { #undef __ } +void X86_64Assembler::AddConstantArea() { + const std::vector<int32_t>& area = constant_area_.GetBuffer(); + for (size_t i = 0, u = area.size(); i < u; i++) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitInt32(area[i]); + } +} + +int ConstantArea::AddInt32(int32_t v) { + for (size_t i = 0, u = buffer_.size(); i < u; i++) { + if (v == buffer_[i]) { + return i * elem_size_; + } + } + + // Didn't match anything. + int result = buffer_.size() * elem_size_; + buffer_.push_back(v); + return result; +} + +int ConstantArea::AddInt64(int64_t v) { + int32_t v_low = v; + int32_t v_high = v >> 32; + if (buffer_.size() > 1) { + // Ensure we don't pass the end of the buffer. + for (size_t i = 0, u = buffer_.size() - 1; i < u; i++) { + if (v_low == buffer_[i] && v_high == buffer_[i+1]) { + return i * elem_size_; + } + } + } + + // Didn't match anything. + int result = buffer_.size() * elem_size_; + buffer_.push_back(v_low); + buffer_.push_back(v_high); + return result; +} + +int ConstantArea::AddDouble(double v) { + // Treat the value as a 64-bit integer value. + return AddInt64(bit_cast<int64_t, double>(v)); +} + +int ConstantArea::AddFloat(float v) { + // Treat the value as a 32-bit integer value. + return AddInt32(bit_cast<int32_t, float>(v)); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index d357a813e8..ef6205c870 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -97,9 +97,13 @@ class Operand : public ValueObject { && (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match. } + AssemblerFixup* GetFixup() const { + return fixup_; + } + protected: // Operand can be sub classed (e.g: Address). - Operand() : rex_(0), length_(0) { } + Operand() : rex_(0), length_(0), fixup_(nullptr) { } void SetModRM(uint8_t mod_in, CpuRegister rm_in) { CHECK_EQ(mod_in & ~3, 0); @@ -136,12 +140,17 @@ class Operand : public ValueObject { length_ += disp_size; } + void SetFixup(AssemblerFixup* fixup) { + fixup_ = fixup; + } + private: uint8_t rex_; uint8_t length_; uint8_t encoding_[6]; + AssemblerFixup* fixup_; - explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); } + explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); } // Get the operand encoding byte at the given index. uint8_t encoding_at(int index_in) const { @@ -232,6 +241,15 @@ class Address : public Operand { return result; } + // An RIP relative address that will be fixed up later. + static Address RIP(AssemblerFixup* fixup) { + Address result; + result.SetModRM(0, CpuRegister(RBP)); + result.SetDisp32(0); + result.SetFixup(fixup); + return result; + } + // If no_rip is true then the Absolute address isn't RIP relative. static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) { return Absolute(addr.Int32Value(), no_rip); @@ -242,6 +260,55 @@ class Address : public Operand { }; +/** + * Class to handle constant area values. + */ +class ConstantArea { + public: + ConstantArea() {} + + /** + * Add a double to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddDouble(double v); + + /** + * Add a float to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddFloat(float v); + + /** + * Add an int32_t to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddInt32(int32_t v); + + /** + * Add an int64_t to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddInt64(int64_t v); + + int GetSize() const { + return buffer_.size() * elem_size_; + } + + const std::vector<int32_t>& GetBuffer() const { + return buffer_; + } + + private: + static constexpr size_t elem_size_ = sizeof(int32_t); + std::vector<int32_t> buffer_; +}; + + class X86_64Assembler FINAL : public Assembler { public: X86_64Assembler() {} @@ -669,6 +736,45 @@ class X86_64Assembler FINAL : public Assembler { // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + /** + * Add a double to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddDouble(double v) { return constant_area_.AddDouble(v); } + + /** + * Add a float to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddFloat(float v) { return constant_area_.AddFloat(v); } + + /** + * Add an int32_t to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddInt32(int32_t v) { return constant_area_.AddInt32(v); } + + /** + * Add an int64_t to the constant area. + * @param v literal to be added to the constant area. + * @returns the offset in the constant area where the literal resides. + */ + int AddInt64(int64_t v) { return constant_area_.AddInt64(v); } + + /** + * Add the contents of the constant area to the assembler buffer. + */ + void AddConstantArea(); + + /** + * Is the constant area empty? + * @returns 'true' if there are no literals in the constant area. + */ + bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; } + private: void EmitUint8(uint8_t value); void EmitInt32(int32_t value); @@ -714,6 +820,8 @@ class X86_64Assembler FINAL : public Assembler { void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand); + ConstantArea constant_area_; + DISALLOW_COPY_AND_ASSIGN(X86_64Assembler); }; |