summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Mendell <mark.p.mendell@intel.com>2015-03-26 21:07:46 -0400
committerMark Mendell <mark.p.mendell@intel.com>2015-04-09 10:47:11 -0400
commitf55c3e0825cdfc4c5a27730031177d1a0198ec5a (patch)
tree500a596838d0ec2bfb3f84ea3c4d87b35d5ca22f
parent96159860fc6c4bf68a51a8a57941971f122685d6 (diff)
downloadandroid_art-f55c3e0825cdfc4c5a27730031177d1a0198ec5a.tar.gz
android_art-f55c3e0825cdfc4c5a27730031177d1a0198ec5a.tar.bz2
android_art-f55c3e0825cdfc4c5a27730031177d1a0198ec5a.zip
[optimizing] Add RIP support for x86_64
Support a constant area addressed using RIP on x86_64. Use it for FP operations to avoid loading constants into a CPU register and moving to a XMM register. Change-Id: I58421759ef2a8475538876c20e696ec787015a72 Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc159
-rw-r--r--compiler/optimizing/code_generator_x86_64.h14
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc50
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc54
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h112
5 files changed, 346 insertions, 43 deletions
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9e08558c8e..1604a7c107 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -429,7 +429,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
- isa_features_(isa_features) {
+ isa_features_(isa_features),
+ constant_area_start_(0) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -1952,7 +1953,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2016,12 +2017,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
}
case Primitive::kPrimFloat: {
- __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2049,7 +2068,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2087,12 +2106,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
}
case Primitive::kPrimFloat: {
- __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2125,7 +2162,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2170,13 +2207,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat: {
DCHECK(first.Equals(locations->Out()));
- __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
DCHECK(first.Equals(locations->Out()));
- __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2566,7 +2621,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2591,12 +2646,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
}
case Primitive::kPrimFloat: {
- __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -4135,5 +4208,65 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+ // Generate the constant area if needed.
+ if (!__ IsConstantAreaEmpty()) {
+ // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+ // byte values. If used for vectors at a later time, this will need to be
+ // updated to 16 bytes with the appropriate offset.
+ __ Align(4, 0);
+ constant_area_start_ = __ CodeSize();
+ __ AddConstantArea();
+ }
+
+ // And finish up.
+ CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+ public:
+ RIPFixup(CodeGeneratorX86_64& codegen, int offset)
+ : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+ private:
+ void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ // Patch the correct offset for the instruction. We use the address of the
+ // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+ int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+ int relative_position = constant_offset - pos;
+
+ // Patch in the right value.
+ region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+ }
+
+ CodeGeneratorX86_64& codegen_;
+
+ // Location in constant area that the fixup refers to.
+ int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+ return Address::RIP(fixup);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index be2a79e55e..c819eecaf0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -246,6 +246,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void Finalize(CodeAllocator* allocator) OVERRIDE;
InstructionSet GetInstructionSet() const OVERRIDE {
return InstructionSet::kX86_64;
@@ -277,6 +278,15 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return isa_features_;
}
+ int ConstantAreaStart() const {
+ return constant_area_start_;
+ }
+
+ Address LiteralDoubleAddress(double v);
+ Address LiteralFloatAddress(float v);
+ Address LiteralInt32Address(int32_t v);
+ Address LiteralInt64Address(int64_t v);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -287,6 +297,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
X86_64Assembler assembler_;
const X86_64InstructionSetFeatures& isa_features_;
+ // Offset to start of the constant area in the assembled code.
+ // Used for fixups to the constant area.
+ int constant_area_start_;
+
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 5122a00d92..c0c4ff3388 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -298,25 +298,23 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke)
// TODO: Allow x86 to work with memory. This requires assembler support, see below.
// locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
- locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit,
+ X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
Location output = locations->Out();
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (output.IsFpuRegister()) {
// In-register
- XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Can mask directly with constant area if we align on 16 bytes.
if (is64bit) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
__ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
__ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
}
} else {
@@ -341,7 +339,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -349,7 +347,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
}
static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -400,7 +398,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
}
static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
- X86_64Assembler* assembler) {
+ X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
Location op1_loc = locations->InAt(0);
Location op2_loc = locations->InAt(1);
Location out_loc = locations->Out();
@@ -427,7 +425,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
//
// This removes one jmp, but needs to copy one input (op1) to out.
//
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
@@ -461,14 +459,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
// NaN handling.
__ Bind(&nan);
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
if (is_double) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
}
- __ movd(out, cpu_temp, is_double);
__ jmp(&done);
// out := op2;
@@ -483,7 +478,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
__ Bind(&done);
}
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -492,39 +487,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo
// The following is sub-optimal, but all we can do for now. It would be fine to also accept
// the second input to be the output (we can simply swap inputs).
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
}
static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 30e821860c..a9e8c501ea 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1975,6 +1975,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand)
for (int i = 1; i < length; i++) {
EmitUint8(operand.encoding_[i]);
}
+ AssemblerFixup* fixup = operand.GetFixup();
+ if (fixup != nullptr) {
+ EmitFixup(fixup);
+ }
}
@@ -2702,5 +2706,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
#undef __
}
+void X86_64Assembler::AddConstantArea() {
+ const std::vector<int32_t>& area = constant_area_.GetBuffer();
+ for (size_t i = 0, u = area.size(); i < u; i++) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitInt32(area[i]);
+ }
+}
+
+int ConstantArea::AddInt32(int32_t v) {
+ for (size_t i = 0, u = buffer_.size(); i < u; i++) {
+ if (v == buffer_[i]) {
+ return i * elem_size_;
+ }
+ }
+
+ // Didn't match anything.
+ int result = buffer_.size() * elem_size_;
+ buffer_.push_back(v);
+ return result;
+}
+
+int ConstantArea::AddInt64(int64_t v) {
+ int32_t v_low = v;
+ int32_t v_high = v >> 32;
+ if (buffer_.size() > 1) {
+ // Ensure we don't pass the end of the buffer.
+ for (size_t i = 0, u = buffer_.size() - 1; i < u; i++) {
+ if (v_low == buffer_[i] && v_high == buffer_[i+1]) {
+ return i * elem_size_;
+ }
+ }
+ }
+
+ // Didn't match anything.
+ int result = buffer_.size() * elem_size_;
+ buffer_.push_back(v_low);
+ buffer_.push_back(v_high);
+ return result;
+}
+
+int ConstantArea::AddDouble(double v) {
+ // Treat the value as a 64-bit integer value.
+ return AddInt64(bit_cast<int64_t, double>(v));
+}
+
+int ConstantArea::AddFloat(float v) {
+ // Treat the value as a 32-bit integer value.
+ return AddInt32(bit_cast<int32_t, float>(v));
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index d357a813e8..ef6205c870 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -97,9 +97,13 @@ class Operand : public ValueObject {
&& (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match.
}
+ AssemblerFixup* GetFixup() const {
+ return fixup_;
+ }
+
protected:
// Operand can be sub classed (e.g: Address).
- Operand() : rex_(0), length_(0) { }
+ Operand() : rex_(0), length_(0), fixup_(nullptr) { }
void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
CHECK_EQ(mod_in & ~3, 0);
@@ -136,12 +140,17 @@ class Operand : public ValueObject {
length_ += disp_size;
}
+ void SetFixup(AssemblerFixup* fixup) {
+ fixup_ = fixup;
+ }
+
private:
uint8_t rex_;
uint8_t length_;
uint8_t encoding_[6];
+ AssemblerFixup* fixup_;
- explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); }
+ explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
// Get the operand encoding byte at the given index.
uint8_t encoding_at(int index_in) const {
@@ -232,6 +241,15 @@ class Address : public Operand {
return result;
}
+ // An RIP relative address that will be fixed up later.
+ static Address RIP(AssemblerFixup* fixup) {
+ Address result;
+ result.SetModRM(0, CpuRegister(RBP));
+ result.SetDisp32(0);
+ result.SetFixup(fixup);
+ return result;
+ }
+
// If no_rip is true then the Absolute address isn't RIP relative.
static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
return Absolute(addr.Int32Value(), no_rip);
@@ -242,6 +260,55 @@ class Address : public Operand {
};
+/**
+ * Class to handle constant area values.
+ */
+class ConstantArea {
+ public:
+ ConstantArea() {}
+
+ /**
+ * Add a double to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddDouble(double v);
+
+ /**
+ * Add a float to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddFloat(float v);
+
+ /**
+ * Add an int32_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt32(int32_t v);
+
+ /**
+ * Add an int64_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt64(int64_t v);
+
+ int GetSize() const {
+ return buffer_.size() * elem_size_;
+ }
+
+ const std::vector<int32_t>& GetBuffer() const {
+ return buffer_;
+ }
+
+ private:
+ static constexpr size_t elem_size_ = sizeof(int32_t);
+ std::vector<int32_t> buffer_;
+};
+
+
class X86_64Assembler FINAL : public Assembler {
public:
X86_64Assembler() {}
@@ -669,6 +736,45 @@ class X86_64Assembler FINAL : public Assembler {
// and branch to a ExceptionSlowPath if it is.
void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+ /**
+ * Add a double to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddDouble(double v) { return constant_area_.AddDouble(v); }
+
+ /**
+ * Add a float to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddFloat(float v) { return constant_area_.AddFloat(v); }
+
+ /**
+ * Add an int32_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+
+ /**
+ * Add an int64_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+
+ /**
+ * Add the contents of the constant area to the assembler buffer.
+ */
+ void AddConstantArea();
+
+ /**
+ * Is the constant area empty?
+ * @returns 'true' if there are no literals in the constant area.
+ */
+ bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
+
private:
void EmitUint8(uint8_t value);
void EmitInt32(int32_t value);
@@ -714,6 +820,8 @@ class X86_64Assembler FINAL : public Assembler {
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
+ ConstantArea constant_area_;
+
DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
};