summaryrefslogtreecommitdiffstats
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/builder.cc25
-rw-r--r--compiler/optimizing/code_generator.cc82
-rw-r--r--compiler/optimizing/code_generator.h33
-rw-r--r--compiler/optimizing/code_generator_arm.cc800
-rw-r--r--compiler/optimizing/code_generator_arm.h41
-rw-r--r--compiler/optimizing/code_generator_arm64.cc360
-rw-r--r--compiler/optimizing/code_generator_arm64.h18
-rw-r--r--compiler/optimizing/code_generator_x86.cc630
-rw-r--r--compiler/optimizing/code_generator_x86.h24
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc611
-rw-r--r--compiler/optimizing/code_generator_x86_64.h41
-rw-r--r--compiler/optimizing/codegen_test.cc24
-rw-r--r--compiler/optimizing/constant_folding_test.cc14
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc3
-rw-r--r--compiler/optimizing/graph_checker.cc72
-rw-r--r--compiler/optimizing/graph_checker.h5
-rw-r--r--compiler/optimizing/graph_visualizer.cc53
-rw-r--r--compiler/optimizing/graph_visualizer.h16
-rw-r--r--compiler/optimizing/gvn_test.cc46
-rw-r--r--compiler/optimizing/inliner.cc17
-rw-r--r--compiler/optimizing/intrinsics.cc366
-rw-r--r--compiler/optimizing/intrinsics.h86
-rw-r--r--compiler/optimizing/intrinsics_list.h87
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc984
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h83
-rw-r--r--compiler/optimizing/linearize_test.cc3
-rw-r--r--compiler/optimizing/live_ranges_test.cc13
-rw-r--r--compiler/optimizing/liveness_test.cc3
-rw-r--r--compiler/optimizing/locations.cc9
-rw-r--r--compiler/optimizing/locations.h59
-rw-r--r--compiler/optimizing/nodes.cc35
-rw-r--r--compiler/optimizing/nodes.h223
-rw-r--r--compiler/optimizing/optimizing_compiler.cc195
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h2
-rw-r--r--compiler/optimizing/optimizing_unit_test.h8
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc11
-rw-r--r--compiler/optimizing/parallel_move_resolver.h3
-rw-r--r--compiler/optimizing/parallel_move_test.cc104
-rw-r--r--compiler/optimizing/register_allocator.cc377
-rw-r--r--compiler/optimizing/register_allocator.h14
-rw-r--r--compiler/optimizing/register_allocator_test.cc144
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc17
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h85
43 files changed, 4532 insertions, 1294 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f9054e0133..9c2facb75e 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
HInvoke* invoke = nullptr;
if (optimized_invoke_type == kVirtual) {
invoke = new (arena_) HInvokeVirtual(
- arena_, number_of_arguments, return_type, dex_pc, table_index);
+ arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
} else if (optimized_invoke_type == kInterface) {
invoke = new (arena_) HInvokeInterface(
arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
@@ -670,10 +670,6 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField);
return false;
}
- if (resolved_field->IsVolatile()) {
- MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile);
- return false;
- }
Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
@@ -689,20 +685,20 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
null_check,
value,
field_type,
- resolved_field->GetOffset()));
+ resolved_field->GetOffset(),
+ resolved_field->IsVolatile()));
} else {
current_block_->AddInstruction(new (arena_) HInstanceFieldGet(
current_block_->GetLastInstruction(),
field_type,
- resolved_field->GetOffset()));
+ resolved_field->GetOffset(),
+ resolved_field->IsVolatile()));
UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
}
return true;
}
-
-
bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
uint32_t dex_pc,
bool is_put) {
@@ -723,11 +719,6 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
return false;
}
- if (resolved_field->IsVolatile()) {
- MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile);
- return false;
- }
-
Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass(
soa, dex_cache, class_loader, outer_compilation_unit_)));
@@ -763,10 +754,12 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
DCHECK_EQ(value->GetType(), field_type);
current_block_->AddInstruction(
- new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset()));
+ new (arena_) HStaticFieldSet(cls, value, field_type, resolved_field->GetOffset(),
+ resolved_field->IsVolatile()));
} else {
current_block_->AddInstruction(
- new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset()));
+ new (arena_) HStaticFieldGet(cls, field_type, resolved_field->GetOffset(),
+ resolved_field->IsVolatile()));
UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
}
return true;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6f424ce11d..9665b0e2ae 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -54,6 +54,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
+ GetGraph()->GetTemporariesVRegSlots()
+ 1 /* filler */,
0, /* the baseline compiler does not have live registers at slow path */
+ 0, /* the baseline compiler does not have live registers at slow path */
GetGraph()->GetMaximumNumberOfOutVRegs()
+ 1 /* current method */);
GenerateFrameEntry();
@@ -136,14 +137,16 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
}
void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
- size_t maximum_number_of_live_registers,
+ size_t maximum_number_of_live_core_registers,
+ size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots) {
first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
SetFrameSize(RoundUp(
number_of_spill_slots * kVRegSize
+ number_of_out_slots * kVRegSize
- + maximum_number_of_live_registers * GetWordSize()
+ + maximum_number_of_live_core_registers * GetWordSize()
+ + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+ FrameEntrySpillSize(),
kStackAlignment));
}
@@ -325,24 +328,27 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con
return current->GetBlockId() + 1 == next->GetBlockId();
}
-CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator,
- HGraph* graph,
- InstructionSet instruction_set) {
+CodeGenerator* CodeGenerator::Create(HGraph* graph,
+ InstructionSet instruction_set,
+ const InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options) {
switch (instruction_set) {
case kArm:
case kThumb2: {
- return new (allocator) arm::CodeGeneratorARM(graph);
+ return new arm::CodeGeneratorARM(graph,
+ *isa_features.AsArmInstructionSetFeatures(),
+ compiler_options);
}
case kArm64: {
- return new (allocator) arm64::CodeGeneratorARM64(graph);
+ return new arm64::CodeGeneratorARM64(graph, compiler_options);
}
case kMips:
return nullptr;
case kX86: {
- return new (allocator) x86::CodeGeneratorX86(graph);
+ return new x86::CodeGeneratorX86(graph, compiler_options);
}
case kX86_64: {
- return new (allocator) x86_64::CodeGeneratorX86_64(graph);
+ return new x86_64::CodeGeneratorX86_64(graph, compiler_options);
}
default:
return nullptr;
@@ -374,7 +380,7 @@ void CodeGenerator::BuildNativeGCMap(
}
}
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, SrcMap* src_map) const {
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const {
uint32_t pc2dex_data_size = 0u;
uint32_t pc2dex_entries = pc_infos_.Size();
uint32_t pc2dex_offset = 0u;
@@ -618,12 +624,62 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
break;
}
+ case Location::kFpuRegisterPair : {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low());
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high());
+ ++i;
+ DCHECK_LT(i, environment_size);
+ break;
+ }
+
+ case Location::kRegisterPair : {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low());
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high());
+ ++i;
+ DCHECK_LT(i, environment_size);
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected kind " << location.GetKind();
}
}
}
+bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
+ HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
+ return (first_next_not_move != nullptr) && first_next_not_move->CanDoImplicitNullCheck();
+}
+
+void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
+ // If we are from a static path don't record the pc as we can't throw NPE.
+ // NB: having the checks here makes the code much less verbose in the arch
+ // specific code generators.
+ if (instr->IsStaticFieldSet() || instr->IsStaticFieldGet()) {
+ return;
+ }
+
+ if (!compiler_options_.GetImplicitNullChecks()) {
+ return;
+ }
+
+ if (!instr->CanDoImplicitNullCheck()) {
+ return;
+ }
+
+ // Find the first previous instruction which is not a move.
+ HInstruction* first_prev_not_move = instr->GetPreviousDisregardingMoves();
+
+ // If the instruction is a null check it means that `instr` is the first user
+ // and needs to record the pc.
+ if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) {
+ HNullCheck* null_check = first_prev_not_move->AsNullCheck();
+ // TODO: The parallel moves modify the environment. Their changes need to be reverted
+ // otherwise the stack maps at the throw point will not be correct.
+ RecordPcInfo(null_check, null_check->GetDexPc());
+ }
+}
+
void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
RegisterSet* register_set = locations->GetLiveRegisters();
size_t stack_offset = first_register_slot_in_slow_path_;
@@ -684,11 +740,9 @@ void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend
}
void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) {
- MoveOperands move1(from1, to1, nullptr);
- MoveOperands move2(from2, to2, nullptr);
HParallelMove parallel_move(GetGraph()->GetArena());
- parallel_move.AddMove(&move1);
- parallel_move.AddMove(&move2);
+ parallel_move.AddMove(from1, to1, nullptr);
+ parallel_move.AddMove(from2, to2, nullptr);
GetMoveResolver()->EmitNativeCode(&parallel_move);
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 1d42c47d56..f66aed912a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -18,7 +18,9 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
#include "base/bit_field.h"
+#include "driver/compiler_options.h"
#include "globals.h"
#include "locations.h"
#include "memory_region.h"
@@ -44,7 +46,10 @@ class Assembler;
class CodeGenerator;
class DexCompilationUnit;
class ParallelMoveResolver;
+class SrcMapElem;
+template <class Alloc>
class SrcMap;
+using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
class CodeAllocator {
public:
@@ -73,15 +78,17 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
-class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
+class CodeGenerator {
public:
// Compiles the graph to executable instructions. Returns whether the compilation
// succeeded.
void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
void CompileOptimized(CodeAllocator* allocator);
- static CodeGenerator* Create(ArenaAllocator* allocator,
- HGraph* graph,
- InstructionSet instruction_set);
+ static CodeGenerator* Create(HGraph* graph,
+ InstructionSet instruction_set,
+ const InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
+ virtual ~CodeGenerator() {}
HGraph* GetGraph() const { return graph_; }
@@ -104,9 +111,11 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
virtual HGraphVisitor* GetInstructionVisitor() = 0;
virtual Assembler* GetAssembler() = 0;
virtual size_t GetWordSize() const = 0;
+ virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
void ComputeFrameSize(size_t number_of_spill_slots,
- size_t maximum_number_of_live_registers,
+ size_t maximum_number_of_live_core_registers,
+ size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots);
virtual size_t FrameEntrySpillSize() const = 0;
int32_t GetStackSlot(HLocal* local) const;
@@ -123,6 +132,9 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
+
+ const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
+
// Saves the register in the stack. Returns the size taken on stack.
virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
// Restores the register from the stack. Returns the size taken on stack.
@@ -137,8 +149,11 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
UNIMPLEMENTED(FATAL);
UNREACHABLE();
}
+ virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
+ bool CanMoveNullCheckToUser(HNullCheck* null_check);
+ void MaybeRecordImplicitNullCheck(HInstruction* instruction);
void AddSlowPath(SlowPathCode* slow_path) {
slow_paths_.Add(slow_path);
@@ -146,7 +161,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
void GenerateSlowPaths();
- void BuildMappingTable(std::vector<uint8_t>* vector, SrcMap* src_map) const;
+ void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
void BuildVMapTable(std::vector<uint8_t>* vector) const;
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
@@ -192,7 +207,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
size_t number_of_fpu_registers,
- size_t number_of_register_pairs)
+ size_t number_of_register_pairs,
+ const CompilerOptions& compiler_options)
: frame_size_(kUninitializedFrameSize),
core_spill_mask_(0),
first_register_slot_in_slow_path_(0),
@@ -203,11 +219,11 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
number_of_fpu_registers_(number_of_fpu_registers),
number_of_register_pairs_(number_of_register_pairs),
graph_(graph),
+ compiler_options_(compiler_options),
pc_infos_(graph->GetArena(), 32),
slow_paths_(graph->GetArena(), 8),
is_leaf_(true),
stack_map_stream_(graph->GetArena()) {}
- ~CodeGenerator() {}
// Register allocation logic.
void AllocateRegistersLocally(HInstruction* instruction) const;
@@ -242,6 +258,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
size_t GetStackOffsetOfSavedRegister(size_t index);
HGraph* const graph_;
+ const CompilerOptions& compiler_options_;
GrowableArray<PcInfo> pc_infos_;
GrowableArray<SlowPathCode*> slow_paths_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 002d9d4449..8c07b46173 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm.h"
+#include "arch/arm/instruction_set_features_arm.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "mirror/array-inl.h"
@@ -36,7 +37,10 @@ static DRegister FromLowSToD(SRegister reg) {
return static_cast<DRegister>(reg / 2);
}
-static constexpr bool kExplicitStackOverflowCheck = false;
+static bool ExpectedPairLayout(Location location) {
+ // We expected this for both core and fpu register pairs.
+ return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
+}
static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7
static constexpr int kCurrentMethodStackOffset = 0;
@@ -255,8 +259,8 @@ class LoadStringSlowPathARM : public SlowPathCodeARM {
codegen->SaveLiveRegisters(locations);
InvokeRuntimeCallingConvention calling_convention;
- arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0));
- __ LoadImmediate(calling_convention.GetRegisterAt(1), instruction_->GetStringIndex());
+ arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
+ __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
@@ -372,13 +376,27 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
return kArmWordSize;
}
-CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
- : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs),
+size_t CodeGeneratorARM::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ StoreSToOffset(static_cast<SRegister>(reg_id), SP, stack_index);
+ return kArmWordSize;
+}
+
+size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ LoadSFromOffset(static_cast<SRegister>(reg_id), SP, stack_index);
+ return kArmWordSize;
+}
+
+CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters,
+ kNumberOfRegisterPairs, compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
- assembler_(true) {}
+ assembler_(true),
+ isa_features_(isa_features) {}
size_t CodeGeneratorARM::FrameEntrySpillSize() const {
return kNumberOfPushedRegistersAtEntry * kArmWordSize;
@@ -499,17 +517,17 @@ void CodeGeneratorARM::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
if (!skip_overflow_check) {
- if (kExplicitStackOverflowCheck) {
+ if (GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+ __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
+ __ LoadFromOffset(kLoadWord, IP, IP, 0);
+ RecordPcInfo(nullptr, 0);
+ } else {
SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
AddSlowPath(slow_path);
__ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value());
__ cmp(SP, ShifterOperand(IP));
__ b(slow_path->GetEntryLabel(), CC);
- } else {
- __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
- __ LoadFromOffset(kLoadWord, IP, IP, 0);
- RecordPcInfo(nullptr, 0);
}
}
@@ -577,11 +595,17 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
gp_index_ += 2;
stack_index_ += 2;
if (index + 1 < calling_convention.GetNumberOfRegisters()) {
- ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair(
- calling_convention.GetRegisterPairAt(index));
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
- return Location::QuickParameter(index, stack_index);
+ if (calling_convention.GetRegisterAt(index) == R1) {
+ // Skip R1, and use R2_R3 instead.
+ gp_index_++;
+ index++;
+ }
+ }
+ if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+ DCHECK_EQ(calling_convention.GetRegisterAt(index) + 1,
+ calling_convention.GetRegisterAt(index + 1));
+ return Location::RegisterPairLocation(calling_convention.GetRegisterAt(index),
+ calling_convention.GetRegisterAt(index + 1));
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
@@ -606,9 +630,11 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
uint32_t index = double_index_;
double_index_ += 2;
- return Location::FpuRegisterPairLocation(
+ Location result = Location::FpuRegisterPairLocation(
calling_convention.GetFpuRegisterAt(index),
calling_convention.GetFpuRegisterAt(index + 1));
+ DCHECK(ExpectedPairLayout(result));
+ return result;
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
@@ -698,27 +724,11 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
} else if (source.IsFpuRegister()) {
UNIMPLEMENTED(FATAL);
- } else if (source.IsQuickParameter()) {
- uint16_t register_index = source.GetQuickParameterRegisterIndex();
- uint16_t stack_index = source.GetQuickParameterStackIndex();
- InvokeDexCallingConvention calling_convention;
- EmitParallelMoves(
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
- Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
- Location::StackSlot(
- calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
- Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
} else {
- // No conflict possible, so just do the moves.
DCHECK(source.IsDoubleStackSlot());
- if (destination.AsRegisterPairLow<Register>() == R1) {
- DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2);
- __ LoadFromOffset(kLoadWord, R1, SP, source.GetStackIndex());
- __ LoadFromOffset(kLoadWord, R2, SP, source.GetHighStackIndex(kArmWordSize));
- } else {
- __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(),
- SP, source.GetStackIndex());
- }
+ DCHECK(ExpectedPairLayout(destination));
+ __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(),
+ SP, source.GetStackIndex());
}
} else if (destination.IsFpuRegisterPair()) {
if (source.IsDoubleStackSlot()) {
@@ -728,22 +738,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
} else {
UNIMPLEMENTED(FATAL);
}
- } else if (destination.IsQuickParameter()) {
- InvokeDexCallingConvention calling_convention;
- uint16_t register_index = destination.GetQuickParameterRegisterIndex();
- uint16_t stack_index = destination.GetQuickParameterStackIndex();
- if (source.IsRegisterPair()) {
- UNIMPLEMENTED(FATAL);
- } else if (source.IsFpuRegister()) {
- UNIMPLEMENTED(FATAL);
- } else {
- DCHECK(source.IsDoubleStackSlot());
- EmitParallelMoves(
- Location::StackSlot(source.GetStackIndex()),
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
- Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
- Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
- }
} else {
DCHECK(destination.IsDoubleStackSlot());
if (source.IsRegisterPair()) {
@@ -756,17 +750,6 @@ void CodeGeneratorARM::Move64(Location destination, Location source) {
__ StoreToOffset(kStoreWordPair, source.AsRegisterPairLow<Register>(),
SP, destination.GetStackIndex());
}
- } else if (source.IsQuickParameter()) {
- InvokeDexCallingConvention calling_convention;
- uint16_t register_index = source.GetQuickParameterRegisterIndex();
- uint16_t stack_index = source.GetQuickParameterStackIndex();
- // Just move the low part. The only time a source is a quick parameter is
- // when moving the parameter to its stack locations. And the (Java) caller
- // of this method has already done that.
- __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index),
- SP, destination.GetStackIndex());
- DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
- static_cast<size_t>(destination.GetHighStackIndex(kArmWordSize)));
} else if (source.IsFpuRegisterPair()) {
__ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
SP,
@@ -799,7 +782,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr
__ LoadImmediate(IP, value);
__ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
}
- } else if (const_to_move->IsLongConstant()) {
+ } else {
+ DCHECK(const_to_move->IsLongConstant()) << const_to_move->DebugName();
int64_t value = const_to_move->AsLongConstant()->GetValue();
if (location.IsRegisterPair()) {
__ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
@@ -951,6 +935,7 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
// Condition has not been materialized, use its inputs as the
// comparison and its condition as the branch condition.
LocationSummary* locations = cond->GetLocations();
+ DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0);
Register left = locations->InAt(0).AsRegister<Register>();
if (locations->InAt(1).IsRegister()) {
__ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
@@ -1196,7 +1181,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
// temp = temp[index_in_cache]
__ LoadFromOffset(
- kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()));
+ kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
// LR = temp[offset_of_quick_compiled_code]
__ LoadFromOffset(kLoadWord, LR, temp,
mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -1240,6 +1225,7 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
} else {
__ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArmWordSize).Int32Value();
@@ -1278,6 +1264,7 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke)
} else {
__ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArmWordSize).Int32Value();
@@ -1296,7 +1283,9 @@ void LocationsBuilderARM::VisitNeg(HNeg* neg) {
switch (neg->GetResultType()) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
- bool output_overlaps = (neg->GetResultType() == Primitive::kPrimLong);
+ Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong)
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap;
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), output_overlaps);
break;
@@ -1823,12 +1812,17 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
switch (add->GetResultType()) {
- case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong);
+ case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), output_overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
}
@@ -1863,7 +1857,8 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) {
}
break;
- case Primitive::kPrimLong:
+ case Primitive::kPrimLong: {
+ DCHECK(second.IsRegisterPair());
__ adds(out.AsRegisterPairLow<Register>(),
first.AsRegisterPairLow<Register>(),
ShifterOperand(second.AsRegisterPairLow<Register>()));
@@ -1871,6 +1866,7 @@ void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) {
first.AsRegisterPairHigh<Register>(),
ShifterOperand(second.AsRegisterPairHigh<Register>()));
break;
+ }
case Primitive::kPrimFloat:
__ vadds(out.AsFpuRegister<SRegister>(),
@@ -1893,12 +1889,17 @@ void LocationsBuilderARM::VisitSub(HSub* sub) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
switch (sub->GetResultType()) {
- case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong);
+ case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), output_overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
}
case Primitive::kPrimFloat:
@@ -1933,6 +1934,7 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) {
}
case Primitive::kPrimLong: {
+ DCHECK(second.IsRegisterPair());
__ subs(out.AsRegisterPairLow<Register>(),
first.AsRegisterPairLow<Register>(),
ShifterOperand(second.AsRegisterPairLow<Register>()));
@@ -2068,8 +2070,7 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) {
calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
locations->SetInAt(1, Location::RegisterPairLocation(
calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
- // The runtime helper puts the output in R0,R2.
- locations->SetOut(Location::RegisterPairLocation(R0, R2));
+ locations->SetOut(Location::RegisterPairLocation(R0, R1));
break;
}
case Primitive::kPrimFloat:
@@ -2106,7 +2107,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
- DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+ DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc());
break;
@@ -2289,8 +2290,8 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
locations->SetInAt(0, Location::RegisterPairLocation(
calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
- // The runtime helper puts the output in R0,R2.
- locations->SetOut(Location::RegisterPairLocation(R0, R2));
+ // The runtime helper puts the output in R0,R1.
+ locations->SetOut(Location::RegisterPairLocation(R0, R1));
break;
}
default:
@@ -2344,7 +2345,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>());
DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
- DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+ DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
int32_t entry_point_offset;
if (op->IsShl()) {
@@ -2409,14 +2410,14 @@ void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
locations->SetOut(Location::RegisterLocation(R0));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
- codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
+ codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2));
__ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
codegen_->InvokeRuntime(
QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc());
@@ -2556,68 +2557,172 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
-void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
+ // TODO (ported from quick): revisit Arm barrier kinds
+ DmbOptions flavour = DmbOptions::ISH; // quiet c++ warnings
+ switch (kind) {
+ case MemBarrierKind::kAnyStore:
+ case MemBarrierKind::kLoadAny:
+ case MemBarrierKind::kAnyAny: {
+ flavour = DmbOptions::ISH;
+ break;
+ }
+ case MemBarrierKind::kStoreStore: {
+ flavour = DmbOptions::ISHST;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ }
+ __ dmb(flavour);
+}
+
+void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr,
+ uint32_t offset,
+ Register out_lo,
+ Register out_hi) {
+ if (offset != 0) {
+ __ LoadImmediate(out_lo, offset);
+ __ add(IP, addr, ShifterOperand(out_lo));
+ addr = IP;
+ }
+ __ ldrexd(out_lo, out_hi, addr);
+}
+
+void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr,
+ uint32_t offset,
+ Register value_lo,
+ Register value_hi,
+ Register temp1,
+ Register temp2,
+ HInstruction* instruction) {
+ Label fail;
+ if (offset != 0) {
+ __ LoadImmediate(temp1, offset);
+ __ add(IP, addr, ShifterOperand(temp1));
+ addr = IP;
+ }
+ __ Bind(&fail);
+ // We need a load followed by store. (The address used in a STREX instruction must
+ // be the same as the address in the most recently executed LDREX instruction.)
+ __ ldrexd(temp1, temp2, addr);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ strexd(temp1, value_lo, value_hi, addr);
+ __ cmp(temp1, ShifterOperand(0));
+ __ b(&fail, NE);
+}
+
+void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- bool needs_write_barrier =
- CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue());
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
+
+
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble;
+ bool generate_volatile = field_info.IsVolatile()
+ && is_wide
+ && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
// Temporary registers for the write barrier.
- if (needs_write_barrier) {
+ // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ } else if (generate_volatile) {
+ // Arm encoding have some additional constraints for ldrexd/strexd:
+ // - registers need to be consecutive
+ // - the first register should be even but not R14.
+ // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
+ // enable Arm encoding.
+ DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
+
locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ if (field_type == Primitive::kPrimDouble) {
+ // For doubles we need two more registers to copy the value.
+ locations->AddTemp(Location::RegisterLocation(R2));
+ locations->AddTemp(Location::RegisterLocation(R3));
+ }
}
}
-void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
- Primitive::Type field_type = instruction->GetFieldType();
+ Register base = locations->InAt(0).AsRegister<Register>();
+ Location value = locations->InAt(1);
+
+ bool is_volatile = field_info.IsVolatile();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ }
switch (field_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(kStoreByte, value, obj, offset);
+ __ StoreToOffset(kStoreByte, value.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(kStoreHalfword, value, obj, offset);
+ __ StoreToOffset(kStoreHalfword, value.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(kStoreWord, value, obj, offset);
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(temp, card, obj, value);
- }
+ __ StoreToOffset(kStoreWord, value.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimLong: {
- Location value = locations->InAt(1);
- __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset);
+ if (is_volatile && !atomic_ldrd_strd) {
+ GenerateWideAtomicStore(base, offset,
+ value.AsRegisterPairLow<Register>(),
+ value.AsRegisterPairHigh<Register>(),
+ locations->GetTemp(0).AsRegister<Register>(),
+ locations->GetTemp(1).AsRegister<Register>(),
+ instruction);
+ } else {
+ __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
break;
}
case Primitive::kPrimFloat: {
- SRegister value = locations->InAt(1).AsFpuRegister<SRegister>();
- __ StoreSToOffset(value, obj, offset);
+ __ StoreSToOffset(value.AsFpuRegister<SRegister>(), base, offset);
break;
}
case Primitive::kPrimDouble: {
- DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>());
- __ StoreDToOffset(value, obj, offset);
+ DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>());
+ if (is_volatile && !atomic_ldrd_strd) {
+ Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>();
+ Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>();
+
+ __ vmovrrd(value_reg_lo, value_reg_hi, value_reg);
+
+ GenerateWideAtomicStore(base, offset,
+ value_reg_lo,
+ value_reg_hi,
+ locations->GetTemp(2).AsRegister<Register>(),
+ locations->GetTemp(3).AsRegister<Register>(),
+ instruction);
+ } else {
+ __ StoreDToOffset(value_reg, base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
break;
}
@@ -2625,75 +2730,160 @@ void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr
LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
}
+
+ // Longs and doubles are handled in the switch.
+ if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register card = locations->GetTemp(1).AsRegister<Register>();
+ codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>());
+ }
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
}
-void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+ bool generate_volatile = field_info.IsVolatile()
+ && (field_info.GetFieldType() == Primitive::kPrimDouble)
+ && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ if (generate_volatile) {
+ // Arm encoding have some additional constraints for ldrexd/strexd:
+ // - registers need to be consecutive
+ // - the first register should be even but not R14.
+ // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
+ // enable Arm encoding.
+ DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+ Register base = locations->InAt(0).AsRegister<Register>();
+ Location out = locations->Out();
+ bool is_volatile = field_info.IsVolatile();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- switch (instruction->GetType()) {
+ switch (field_type) {
case Primitive::kPrimBoolean: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimByte: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimShort: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimChar: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadWord, out, obj, offset);
+ __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
break;
}
case Primitive::kPrimLong: {
- // TODO: support volatile.
- Location out = locations->Out();
- __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
+ if (is_volatile && !atomic_ldrd_strd) {
+ GenerateWideAtomicLoad(base, offset,
+ out.AsRegisterPairLow<Register>(),
+ out.AsRegisterPairHigh<Register>());
+ } else {
+ __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset);
+ }
break;
}
case Primitive::kPrimFloat: {
- SRegister out = locations->Out().AsFpuRegister<SRegister>();
- __ LoadSFromOffset(out, obj, offset);
+ __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset);
break;
}
case Primitive::kPrimDouble: {
- DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>());
- __ LoadDFromOffset(out, obj, offset);
+ DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
+ if (is_volatile && !atomic_ldrd_strd) {
+ Register lo = locations->GetTemp(0).AsRegister<Register>();
+ Register hi = locations->GetTemp(1).AsRegister<Register>();
+ GenerateWideAtomicLoad(base, offset, lo, hi);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ vmovdrr(out_reg, lo, hi);
+ } else {
+ __ LoadDFromOffset(out_reg, base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
break;
}
case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
+ LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
}
+
+ // Doubles are handled in the switch.
+ if (field_type != Primitive::kPrimDouble) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+}
+
+void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
@@ -2705,20 +2895,32 @@ void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
}
}
-void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
+void InstructionCodeGeneratorARM::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (codegen_->CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+ Location obj = instruction->GetLocations()->InAt(0);
+
+ __ LoadFromOffset(kLoadWord, IP, obj.AsRegister<Register>(), 0);
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorARM::GenerateExplicitNullCheck(HNullCheck* instruction) {
SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM(instruction);
codegen_->AddSlowPath(slow_path);
LocationSummary* locations = instruction->GetLocations();
Location obj = locations->InAt(0);
- if (obj.IsRegister()) {
- __ cmp(obj.AsRegister<Register>(), ShifterOperand(0));
- __ b(slow_path->GetEntryLabel(), EQ);
+ __ cmp(obj.AsRegister<Register>(), ShifterOperand(0));
+ __ b(slow_path->GetEntryLabel(), EQ);
+}
+
+void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
+ if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+ GenerateImplicitNullCheck(instruction);
} else {
- DCHECK(obj.IsConstant()) << obj;
- DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
- __ b(slow_path->GetEntryLabel());
+ GenerateExplicitNullCheck(instruction);
}
}
@@ -2822,14 +3024,39 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ Location out = locations->Out();
+ DCHECK(out.IsFpuRegister());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ Location out = locations->Out();
+ DCHECK(out.IsFpuRegisterPair());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+ __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+ __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
@@ -2913,6 +3140,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
__ StoreToOffset(kStoreWord, value, IP, data_offset);
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
if (needs_write_barrier) {
DCHECK_EQ(value_type, Primitive::kPrimNot);
Register temp = locations->GetTemp(0).AsRegister<Register>();
@@ -2942,14 +3170,44 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ Location value = locations->InAt(2);
+ DCHECK(value.IsFpuRegister());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset);
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ Location value = locations->InAt(2);
+ DCHECK(value.IsFpuRegisterPair());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+ __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+ __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+ }
+
+ break;
+ }
+
case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
+ LOG(FATAL) << "Unreachable type " << value_type;
UNREACHABLE();
}
+
+ // Ints and objects are handled in the switch.
+ if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) {
@@ -2965,6 +3223,7 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
Register obj = locations->InAt(0).AsRegister<Register>();
Register out = locations->Out().AsRegister<Register>();
__ LoadFromOffset(kLoadWord, out, obj, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -3075,21 +3334,87 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
if (destination.IsRegister()) {
__ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(),
SP, source.GetStackIndex());
+ } else if (destination.IsFpuRegister()) {
+ __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex());
} else {
DCHECK(destination.IsStackSlot());
__ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
}
- } else {
- DCHECK(source.IsConstant());
- DCHECK(source.GetConstant()->IsIntConstant());
- int32_t value = source.GetConstant()->AsIntConstant()->GetValue();
- if (destination.IsRegister()) {
- __ LoadImmediate(destination.AsRegister<Register>(), value);
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsFpuRegister()) {
+ __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>());
} else {
DCHECK(destination.IsStackSlot());
- __ LoadImmediate(IP, value);
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
+ }
+ } else if (source.IsDoubleStackSlot()) {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ } else {
+ DCHECK(source.IsConstant()) << source;
+ HInstruction* constant = source.GetConstant();
+ if (constant->IsIntConstant()) {
+ int32_t value = constant->AsIntConstant()->GetValue();
+ if (destination.IsRegister()) {
+ __ LoadImmediate(destination.AsRegister<Register>(), value);
+ } else {
+ DCHECK(destination.IsStackSlot());
+ __ LoadImmediate(IP, value);
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ }
+ } else if (constant->IsLongConstant()) {
+ int64_t value = constant->AsLongConstant()->GetValue();
+ if (destination.IsRegister()) {
+ // In the presence of long or double constants, the parallel move resolver will
+ // split the move into two, but keeps the same constant for both moves. Here,
+ // we use the low or high part depending on which register this move goes to.
+ if (destination.reg() % 2 == 0) {
+ __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value));
+ } else {
+ __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value));
+ }
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ LoadImmediate(IP, Low32Bits(value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadImmediate(IP, High32Bits(value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ }
+ } else if (constant->IsDoubleConstant()) {
+ double value = constant->AsDoubleConstant()->GetValue();
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
+ if (destination.IsFpuRegister()) {
+ // In the presence of long or double constants, the parallel move resolver will
+ // split the move into two, but keeps the same constant for both moves. Here,
+ // we use the low or high part depending on which register this move goes to.
+ if (destination.reg() % 2 == 0) {
+ __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
+ bit_cast<float, uint32_t>(Low32Bits(int_value)));
+ } else {
+ __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
+ bit_cast<float, uint32_t>(High32Bits(int_value)));
+ }
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ LoadImmediate(IP, Low32Bits(int_value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadImmediate(IP, High32Bits(int_value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ }
+ } else {
+ DCHECK(constant->IsFloatConstant()) << constant->DebugName();
+ float value = constant->AsFloatConstant()->GetValue();
+ if (destination.IsFpuRegister()) {
+ __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value);
+ } else {
+ DCHECK(destination.IsStackSlot());
+ __ LoadImmediate(IP, bit_cast<int32_t, float>(value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ }
}
}
}
@@ -3128,8 +3453,25 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsStackSlot()) {
Exchange(source.GetStackIndex(), destination.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+ __ vmovrs(IP, source.AsFpuRegister<SRegister>());
+ __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
+ __ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+ } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
+ SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
+ : destination.AsFpuRegister<SRegister>();
+ int mem = source.IsFpuRegister()
+ ? destination.GetStackIndex()
+ : source.GetStackIndex();
+
+ __ vmovrs(IP, reg);
+ __ LoadFromOffset(kLoadWord, IP, SP, mem);
+ __ StoreToOffset(kStoreWord, IP, SP, mem);
+ } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+ Exchange(source.GetStackIndex(), destination.GetStackIndex());
+ Exchange(source.GetHighStackIndex(kArmWordSize), destination.GetHighStackIndex(kArmWordSize));
} else {
- LOG(FATAL) << "Unimplemented";
+ LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
}
}
@@ -3206,146 +3548,6 @@ void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
__ Bind(slow_path->GetExitLabel());
}
-void LocationsBuilderARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- Register cls = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
-
- switch (instruction->GetType()) {
- case Primitive::kPrimBoolean: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset);
- break;
- }
-
- case Primitive::kPrimByte: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadSignedByte, out, cls, offset);
- break;
- }
-
- case Primitive::kPrimShort: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset);
- break;
- }
-
- case Primitive::kPrimChar: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset);
- break;
- }
-
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- Register out = locations->Out().AsRegister<Register>();
- __ LoadFromOffset(kLoadWord, out, cls, offset);
- break;
- }
-
- case Primitive::kPrimLong: {
- // TODO: support volatile.
- Location out = locations->Out();
- __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), cls, offset);
- break;
- }
-
- case Primitive::kPrimFloat: {
- SRegister out = locations->Out().AsFpuRegister<SRegister>();
- __ LoadSFromOffset(out, cls, offset);
- break;
- }
-
- case Primitive::kPrimDouble: {
- DRegister out = FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>());
- __ LoadDFromOffset(out, cls, offset);
- break;
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
- UNREACHABLE();
- }
-}
-
-void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- bool needs_write_barrier =
- CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue());
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- // Temporary registers for the write barrier.
- if (needs_write_barrier) {
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- }
-}
-
-void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- Register cls = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
- Primitive::Type field_type = instruction->GetFieldType();
-
- switch (field_type) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(kStoreByte, value, cls, offset);
- break;
- }
-
- case Primitive::kPrimShort:
- case Primitive::kPrimChar: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(kStoreHalfword, value, cls, offset);
- break;
- }
-
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ StoreToOffset(kStoreWord, value, cls, offset);
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(temp, card, cls, value);
- }
- break;
- }
-
- case Primitive::kPrimLong: {
- Location value = locations->InAt(1);
- __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), cls, offset);
- break;
- }
-
- case Primitive::kPrimFloat: {
- SRegister value = locations->InAt(1).AsFpuRegister<SRegister>();
- __ StoreSToOffset(value, cls, offset);
- break;
- }
-
- case Primitive::kPrimDouble: {
- DRegister value = FromLowSToD(locations->InAt(1).AsFpuRegisterPairLow<SRegister>());
- __ StoreDToOffset(value, cls, offset);
- break;
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << field_type;
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
@@ -3495,7 +3697,9 @@ void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction)
|| instruction->GetResultType() == Primitive::kPrimLong);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- bool output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong);
+ Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong)
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap;
locations->SetOut(Location::RequiresRegister(), output_overlaps);
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 226e635d05..0de6669aa7 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -18,6 +18,8 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm/assembler_thumb2.h"
@@ -32,7 +34,6 @@ class SlowPathCodeARM;
static constexpr size_t kArmWordSize = kArmPointerSize;
static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
-static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 };
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static constexpr SRegister kParameterFpuRegisters[] =
{ S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 };
@@ -46,11 +47,6 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister>
kParameterFpuRegisters,
kParameterFpuRegistersLength) {}
- RegisterPair GetRegisterPairAt(size_t argument_index) {
- DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
- return kParameterCorePairRegisters[argument_index];
- }
-
private:
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
@@ -110,6 +106,8 @@ class LocationsBuilderARM : public HGraphVisitor {
void HandleInvoke(HInvoke* invoke);
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
CodeGeneratorARM* const codegen_;
InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -138,6 +136,17 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void GenerateClassInitializationCheck(SlowPathCodeARM* slow_path, Register class_reg);
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleShift(HBinaryOperation* operation);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+ void GenerateWideAtomicStore(Register addr, uint32_t offset,
+ Register value_lo, Register value_hi,
+ Register temp1, Register temp2,
+ HInstruction* instruction);
+ void GenerateWideAtomicLoad(Register addr, uint32_t offset,
+ Register out_lo, Register out_hi);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateImplicitNullCheck(HNullCheck* instruction);
+ void GenerateExplicitNullCheck(HNullCheck* instruction);
ArmAssembler* const assembler_;
CodeGeneratorARM* const codegen_;
@@ -147,7 +156,9 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
class CodeGeneratorARM : public CodeGenerator {
public:
- explicit CodeGeneratorARM(HGraph* graph);
+ CodeGeneratorARM(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorARM() {}
void GenerateFrameEntry() OVERRIDE;
@@ -156,11 +167,18 @@ class CodeGeneratorARM : public CodeGenerator {
void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
size_t GetWordSize() const OVERRIDE {
return kArmWordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ // Allocated in S registers, which are word sized.
+ return kArmWordSize;
+ }
+
size_t FrameEntrySpillSize() const OVERRIDE;
HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -221,6 +239,14 @@ class CodeGeneratorARM : public CodeGenerator {
block_labels_.SetSize(GetGraph()->GetBlocks().Size());
}
+ const ArmInstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
+ bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+ return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -228,6 +254,7 @@ class CodeGeneratorARM : public CodeGenerator {
InstructionCodeGeneratorARM instruction_visitor_;
ParallelMoveResolverARM move_resolver_;
Thumb2Assembler assembler_;
+ const ArmInstructionSetFeatures& isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
};
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c7517d3abc..271eb82ee6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -17,10 +17,12 @@
#include "code_generator_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
#include "mirror/array-inl.h"
#include "mirror/art_method.h"
#include "mirror/class.h"
+#include "offsets.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
#include "utils/assembler.h"
@@ -38,7 +40,9 @@ namespace art {
namespace arm64 {
-static constexpr bool kExplicitStackOverflowCheck = false;
+// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers.
+// For now we prefer the use of load-acquire, store-release over explicit memory barriers.
+static constexpr bool kUseAcquireRelease = true;
static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
static constexpr int kCurrentMethodStackOffset = 0;
@@ -233,8 +237,9 @@ Location ARM64ReturnLocation(Primitive::Type return_type) {
static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static const FPRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+ arraysize(kRuntimeParameterCoreRegisters);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> {
public:
@@ -294,6 +299,7 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
length_location_, LocationFrom(calling_convention.GetRegisterAt(1)));
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc());
+ CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
private:
@@ -313,6 +319,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
__ Bind(GetEntryLabel());
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc());
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
private:
@@ -343,6 +350,11 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
: QUICK_ENTRY_POINT(pInitializeType);
arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_);
+ if (do_clinit_) {
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>();
+ } else {
+ CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>();
+ }
// Move the class to the desired location.
Location out = locations->Out();
@@ -386,10 +398,11 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
codegen->SaveLiveRegisters(locations);
InvokeRuntimeCallingConvention calling_convention;
- arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0).W());
- __ Mov(calling_convention.GetRegisterAt(1).W(), instruction_->GetStringIndex());
+ arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
+ __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex());
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>();
Primitive::Type type = instruction_->GetType();
arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
@@ -412,6 +425,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
__ Bind(GetEntryLabel());
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc());
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
private:
@@ -428,6 +442,7 @@ class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0);
+ CheckEntrypointTypes<kQuickThrowStackOverflow, void, void*>();
}
private:
@@ -446,6 +461,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
codegen->SaveLiveRegisters(instruction_->GetLocations());
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc());
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
codegen->RestoreLiveRegisters(instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
@@ -502,9 +518,12 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+ CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+ const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_);
+ CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
codegen->RestoreLiveRegisters(locations);
@@ -543,11 +562,12 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
return next_location;
}
-CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph)
+CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfAllocatableRegisters,
kNumberOfAllocatableFPRegisters,
- kNumberOfAllocatableRegisterPairs),
+ kNumberOfAllocatableRegisterPairs,
+ compiler_options),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -585,17 +605,17 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
if (do_overflow_check) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireX();
- if (kExplicitStackOverflowCheck) {
+ if (GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+ __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+ __ Ldr(wzr, MemOperand(temp, 0));
+ RecordPcInfo(nullptr, 0);
+ } else {
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64();
AddSlowPath(slow_path);
__ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value()));
__ Cmp(sp, temp);
__ B(lo, slow_path->GetEntryLabel());
- } else {
- __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
- __ Ldr(wzr, MemOperand(temp, 0));
- RecordPcInfo(nullptr, 0);
}
}
@@ -949,8 +969,8 @@ void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) {
}
void CodeGeneratorARM64::Load(Primitive::Type type,
- vixl::CPURegister dst,
- const vixl::MemOperand& src) {
+ CPURegister dst,
+ const MemOperand& src) {
switch (type) {
case Primitive::kPrimBoolean:
__ Ldrb(Register(dst), src);
@@ -969,7 +989,7 @@ void CodeGeneratorARM64::Load(Primitive::Type type,
case Primitive::kPrimLong:
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- DCHECK(dst.Is64Bits() == Is64BitType(type));
+ DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
__ Ldr(dst, src);
break;
case Primitive::kPrimVoid:
@@ -977,31 +997,130 @@ void CodeGeneratorARM64::Load(Primitive::Type type,
}
}
+void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
+ CPURegister dst,
+ const MemOperand& src) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp_base = temps.AcquireX();
+ Primitive::Type type = instruction->GetType();
+
+ DCHECK(!src.IsRegisterOffset());
+ DCHECK(!src.IsPreIndex());
+ DCHECK(!src.IsPostIndex());
+
+ // TODO(vixl): Let the MacroAssembler handle MemOperand.
+ __ Add(temp_base, src.base(), src.offset());
+ MemOperand base = MemOperand(temp_base);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ __ Ldarb(Register(dst), base);
+ MaybeRecordImplicitNullCheck(instruction);
+ break;
+ case Primitive::kPrimByte:
+ __ Ldarb(Register(dst), base);
+ MaybeRecordImplicitNullCheck(instruction);
+ __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+ break;
+ case Primitive::kPrimChar:
+ __ Ldarh(Register(dst), base);
+ MaybeRecordImplicitNullCheck(instruction);
+ break;
+ case Primitive::kPrimShort:
+ __ Ldarh(Register(dst), base);
+ MaybeRecordImplicitNullCheck(instruction);
+ __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ case Primitive::kPrimLong:
+ DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
+ __ Ldar(Register(dst), base);
+ MaybeRecordImplicitNullCheck(instruction);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ DCHECK(dst.IsFPRegister());
+ DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
+
+ Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+ __ Ldar(temp, base);
+ MaybeRecordImplicitNullCheck(instruction);
+ __ Fmov(FPRegister(dst), temp);
+ break;
+ }
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ }
+}
+
void CodeGeneratorARM64::Store(Primitive::Type type,
- vixl::CPURegister rt,
- const vixl::MemOperand& dst) {
+ CPURegister src,
+ const MemOperand& dst) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- __ Strb(Register(rt), dst);
+ __ Strb(Register(src), dst);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- __ Strh(Register(rt), dst);
+ __ Strh(Register(src), dst);
break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
case Primitive::kPrimLong:
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- DCHECK(rt.Is64Bits() == Is64BitType(type));
- __ Str(rt, dst);
+ DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+ __ Str(src, dst);
break;
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << type;
}
}
+void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
+ CPURegister src,
+ const MemOperand& dst) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp_base = temps.AcquireX();
+
+ DCHECK(!dst.IsRegisterOffset());
+ DCHECK(!dst.IsPreIndex());
+ DCHECK(!dst.IsPostIndex());
+
+ // TODO(vixl): Let the MacroAssembler handle this.
+ __ Add(temp_base, dst.base(), dst.offset());
+ MemOperand base = MemOperand(temp_base);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ __ Stlrb(Register(src), base);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ __ Stlrh(Register(src), base);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ case Primitive::kPrimLong:
+ DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+ __ Stlr(Register(src), base);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ DCHECK(src.IsFPRegister());
+ DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+
+ Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+ __ Fmov(temp, FPRegister(src));
+ __ Stlr(temp, base);
+ break;
+ }
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ }
+}
+
void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
DCHECK(current_method.IsW());
__ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
@@ -1026,14 +1145,47 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
vixl::Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
- __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset()));
- __ Cmp(temp, mirror::Class::kStatusInitialized);
- __ B(lt, slow_path->GetEntryLabel());
+ size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
- __ Dmb(InnerShareable, BarrierReads);
+ if (kUseAcquireRelease) {
+ // TODO(vixl): Let the MacroAssembler handle MemOperand.
+ __ Add(temp, class_reg, status_offset);
+ __ Ldar(temp, HeapOperand(temp));
+ __ Cmp(temp, mirror::Class::kStatusInitialized);
+ __ B(lt, slow_path->GetEntryLabel());
+ } else {
+ __ Ldr(temp, HeapOperand(class_reg, status_offset));
+ __ Cmp(temp, mirror::Class::kStatusInitialized);
+ __ B(lt, slow_path->GetEntryLabel());
+ __ Dmb(InnerShareable, BarrierReads);
+ }
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+ BarrierType type = BarrierAll;
+
+ switch (kind) {
+ case MemBarrierKind::kAnyAny:
+ case MemBarrierKind::kAnyStore: {
+ type = BarrierAll;
+ break;
+ }
+ case MemBarrierKind::kLoadAny: {
+ type = BarrierReads;
+ break;
+ }
+ case MemBarrierKind::kStoreStore: {
+ type = BarrierWrites;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ }
+ __ Dmb(InnerShareable, type);
+}
+
void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathARM64* slow_path =
@@ -1254,6 +1406,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
}
codegen_->Load(type, OutputCPURegister(instruction), source);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
@@ -1265,6 +1418,7 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
__ Ldr(OutputRegister(instruction),
HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
@@ -1288,7 +1442,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
Primitive::Type value_type = instruction->GetComponentType();
if (value_type == Primitive::kPrimNot) {
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc());
-
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
} else {
LocationSummary* locations = instruction->GetLocations();
Register obj = InputRegisterAt(instruction, 0);
@@ -1309,6 +1463,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
}
codegen_->Store(value_type, value, destination);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -1660,28 +1815,60 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
}
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ // NB: LoadAcquire will record the pc info if needed.
+ codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- Primitive::Type field_type = instruction->GetFieldType();
- CPURegister value = InputCPURegisterAt(instruction, 1);
Register obj = InputRegisterAt(instruction, 0);
- codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset()));
- if (field_type == Primitive::kPrimNot) {
+ CPURegister value = InputCPURegisterAt(instruction, 1);
+ Offset offset = instruction->GetFieldOffset();
+ Primitive::Type field_type = instruction->GetFieldType();
+
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ } else {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->Store(field_type, value, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Store(field_type, value, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
codegen_->MarkGCCard(obj, Register(value));
}
}
@@ -1692,7 +1879,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), true); // The output does overlap inputs.
+ // The output does overlap inputs.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -1782,6 +1970,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
} else {
__ Ldr(temp, HeapOperandFrom(receiver, class_offset));
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
__ Ldr(temp, HeapOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
@@ -1805,7 +1994,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
// Make sure that ArtMethod* is passed in W0 as per the calling convention
DCHECK(temp.Is(w0));
size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() +
- invoke->GetIndexInDexCache() * kHeapRefSize;
+ invoke->GetDexMethodIndex() * kHeapRefSize;
// TODO: Implement all kinds of calls:
// 1) boot -> boot
@@ -1847,6 +2036,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
DCHECK(receiver.IsRegister());
__ Ldr(temp, HeapOperandFrom(receiver, class_offset));
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
__ Ldr(temp, HeapOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
@@ -1959,6 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins
? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject),
instruction,
instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
}
void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -2044,9 +2235,11 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
locations->SetOut(LocationFrom(x0));
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
+ void*, uint32_t, int32_t, mirror::ArtMethod*>();
}
void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -2055,11 +2248,13 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
DCHECK(type_index.Is(w0));
Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot);
- DCHECK(current_method.Is(w1));
+ DCHECK(current_method.Is(w2));
codegen_->LoadCurrentMethod(current_method);
__ Mov(type_index, instruction->GetTypeIndex());
codegen_->InvokeRuntime(
QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
+ void*, uint32_t, int32_t, mirror::ArtMethod*>();
}
void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -2069,6 +2264,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>();
}
void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -2081,6 +2277,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
__ Mov(type_index, instruction->GetTypeIndex());
codegen_->InvokeRuntime(
QUICK_ENTRY_POINT(pAllocObjectWithAccessCheck), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>();
}
void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -2114,18 +2311,31 @@ void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
}
}
-void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
+void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (codegen_->CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+ Location obj = instruction->GetLocations()->InAt(0);
+
+ __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
LocationSummary* locations = instruction->GetLocations();
Location obj = locations->InAt(0);
- if (obj.IsRegister()) {
- __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
+
+ __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
+}
+
+void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
+ if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+ GenerateImplicitNullCheck(instruction);
} else {
- DCHECK(obj.IsConstant()) << obj;
- DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
- __ B(slow_path->GetEntryLabel());
+ GenerateExplicitNullCheck(instruction);
}
}
@@ -2175,9 +2385,12 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) {
}
void LocationsBuilderARM64::VisitRem(HRem* rem) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
- switch (rem->GetResultType()) {
+ Primitive::Type type = rem->GetResultType();
+ LocationSummary::CallKind call_kind = IsFPType(type) ? LocationSummary::kCall
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+ switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
@@ -2185,13 +2398,24 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(type));
+
+ break;
+ }
+
default:
- LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+ LOG(FATAL) << "Unexpected rem type " << type;
}
}
void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
+
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
@@ -2206,6 +2430,14 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
break;
}
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
+ : QUICK_ENTRY_POINT(pFmod);
+ codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc());
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected rem type " << type;
}
@@ -2294,7 +2526,19 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
- codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ // NB: LoadAcquire will record the pc info if needed.
+ codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+ }
}
void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
@@ -2305,13 +2549,24 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
}
void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- CPURegister value = InputCPURegisterAt(instruction, 1);
Register cls = InputRegisterAt(instruction, 0);
+ CPURegister value = InputCPURegisterAt(instruction, 1);
Offset offset = instruction->GetFieldOffset();
Primitive::Type field_type = instruction->GetFieldType();
- codegen_->Store(field_type, value, HeapOperand(cls, offset));
- if (field_type == Primitive::kPrimNot) {
+ if (instruction->IsVolatile()) {
+ if (kUseAcquireRelease) {
+ codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset));
+ } else {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->Store(field_type, value, HeapOperand(cls, offset));
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ codegen_->Store(field_type, value, HeapOperand(cls, offset));
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
codegen_->MarkGCCard(cls, Register(value));
}
}
@@ -2353,6 +2608,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
codegen_->InvokeRuntime(
QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 1d5bfb734e..27c6fbdbf4 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,8 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm64/assembler_arm64.h"
@@ -108,9 +110,12 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor {
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
void HandleShift(HBinaryOperation* instr);
+ void GenerateImplicitNullCheck(HNullCheck* instruction);
+ void GenerateExplicitNullCheck(HNullCheck* instruction);
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
@@ -162,7 +167,7 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolver {
class CodeGeneratorARM64 : public CodeGenerator {
public:
- explicit CodeGeneratorARM64(HGraph* graph);
+ CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options);
virtual ~CodeGeneratorARM64() {}
void GenerateFrameEntry() OVERRIDE;
@@ -189,6 +194,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
return kArm64WordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ // Allocated in D registers, which are word sized.
+ return kArm64WordSize;
+ }
+
uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
vixl::Label* block_entry_label = GetLabelOf(block);
DCHECK(block_entry_label->IsBound());
@@ -257,12 +267,18 @@ class CodeGeneratorARM64 : public CodeGenerator {
void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
void LoadCurrentMethod(vixl::Register current_method);
+ void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
+ void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
// Generate code to invoke a runtime entry point.
void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
+ bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ return false;
+ }
+
private:
// Labels for each block that will be compiled.
vixl::Label* block_labels_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e7edd8a805..ac6fdbcfe9 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -31,8 +31,6 @@ namespace art {
namespace x86 {
-static constexpr bool kExplicitStackOverflowCheck = false;
-
static constexpr int kNumberOfPushedRegistersAtEntry = 1;
static constexpr int kCurrentMethodStackOffset = 0;
@@ -42,6 +40,8 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength =
static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static constexpr int kC2ConditionMask = 0x400;
+
// Marker for places that can be updated once we don't follow the quick ABI.
static constexpr bool kFollowsQuickABI = true;
@@ -215,8 +215,8 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 {
codegen->SaveLiveRegisters(locations);
InvokeRuntimeCallingConvention calling_convention;
- x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0));
- __ movl(calling_convention.GetRegisterAt(1), Immediate(instruction_->GetStringIndex()));
+ x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex()));
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString)));
codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
@@ -373,8 +373,9 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
return kX86WordSize;
}
-CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
- : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, kNumberOfRegisterPairs),
+CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
+ : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters,
+ kNumberOfRegisterPairs, compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -469,7 +470,9 @@ void CodeGeneratorX86::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
- if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
+ bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks();
+
+ if (!skip_overflow_check && implicitStackOverflowChecks) {
__ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
RecordPcInfo(nullptr, 0);
}
@@ -477,7 +480,7 @@ void CodeGeneratorX86::GenerateFrameEntry() {
// The return PC has already been pushed on the stack.
__ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
- if (!skip_overflow_check && kExplicitStackOverflowCheck) {
+ if (!skip_overflow_check && !implicitStackOverflowChecks) {
SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86();
AddSlowPath(slow_path);
@@ -643,9 +646,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
DCHECK(source.IsDoubleStackSlot());
EmitParallelMoves(
Location::StackSlot(source.GetStackIndex()),
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+ Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)),
Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
+ __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex()));
}
} else if (destination.IsFpuRegister()) {
if (source.IsDoubleStackSlot()) {
@@ -1134,7 +1138,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
// temp = temp->dex_cache_resolved_methods_;
__ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
// temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
// (temp + offset_of_quick_compiled_code)()
__ call(Address(
temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
@@ -1198,6 +1202,7 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
} else {
__ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -1234,6 +1239,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke)
} else {
__ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -2074,6 +2080,81 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
}
}
+void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset,
+ uint32_t stack_adjustment, bool is_float) {
+ if (source.IsStackSlot()) {
+ DCHECK(is_float);
+ __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ } else if (source.IsDoubleStackSlot()) {
+ DCHECK(!is_float);
+ __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ } else {
+ // Write the value to the temporary location on the stack and load to FP stack.
+ if (is_float) {
+ Location stack_temp = Location::StackSlot(temp_offset);
+ codegen_->Move32(stack_temp, source);
+ __ flds(Address(ESP, temp_offset));
+ } else {
+ Location stack_temp = Location::DoubleStackSlot(temp_offset);
+ codegen_->Move64(stack_temp, source);
+ __ fldl(Address(ESP, temp_offset));
+ }
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
+ Primitive::Type type = rem->GetResultType();
+ bool is_float = type == Primitive::kPrimFloat;
+ size_t elem_size = Primitive::ComponentSize(type);
+ LocationSummary* locations = rem->GetLocations();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ Location out = locations->Out();
+
+ // Create stack space for 2 elements.
+ // TODO: enhance register allocator to ask for stack temporaries.
+ __ subl(ESP, Immediate(2 * elem_size));
+
+ // Load the values to the FP stack in reverse order, using temporaries if needed.
+ PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
+ PushOntoFPStack(first, 0, 2 * elem_size, is_float);
+
+ // Loop doing FPREM until we stabilize.
+ Label retry;
+ __ Bind(&retry);
+ __ fprem();
+
+ // Move FP status to AX.
+ __ fstsw();
+
+ // And see if the argument reduction is complete. This is signaled by the
+ // C2 FPU flag bit set to 0.
+ __ andl(EAX, Immediate(kC2ConditionMask));
+ __ j(kNotEqual, &retry);
+
+ // We have settled on the final value. Retrieve it into an XMM register.
+ // Store FP top of stack to real stack.
+ if (is_float) {
+ __ fsts(Address(ESP, 0));
+ } else {
+ __ fstl(Address(ESP, 0));
+ }
+
+ // Pop the 2 items from the FP stack.
+ __ fucompp();
+
+ // Load the value from the stack into an XMM register.
+ DCHECK(out.IsFpuRegister()) << out;
+ if (is_float) {
+ __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+ } else {
+ __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+ }
+
+ // And remove the temporary stack space we allocated.
+ __ addl(ESP, Immediate(2 * elem_size));
+}
+
void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
@@ -2207,10 +2288,8 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
void LocationsBuilderX86::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
- LocationSummary::CallKind call_kind = type == Primitive::kPrimInt
- ? LocationSummary::kNoCall
- : LocationSummary::kCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
switch (type) {
case Primitive::kPrimInt: {
@@ -2229,24 +2308,12 @@ void LocationsBuilderX86::VisitRem(HRem* rem) {
locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
break;
}
+ case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
- InvokeRuntimeCallingConvention calling_convention;
- // x86 floating-point parameters are passed through core registers (EAX, ECX).
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- // The runtime helper puts the result in XMM0.
- locations->SetOut(Location::FpuRegisterLocation(XMM0));
- break;
- }
- case Primitive::kPrimDouble: {
- InvokeRuntimeCallingConvention calling_convention;
- // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX).
- locations->SetInAt(0, Location::RegisterPairLocation(
- calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
- locations->SetInAt(1, Location::RegisterPairLocation(
- calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
- // The runtime helper puts the result in XMM0.
- locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ locations->SetInAt(0, Location::Any());
+ locations->SetInAt(1, Location::Any());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RegisterLocation(EAX));
break;
}
@@ -2263,14 +2330,9 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
GenerateDivRemIntegral(rem);
break;
}
- case Primitive::kPrimFloat: {
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf)));
- codegen_->RecordPcInfo(rem, rem->GetDexPc());
- break;
- }
+ case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod)));
- codegen_->RecordPcInfo(rem, rem->GetDexPc());
+ GenerateRemFP(rem);
break;
}
default:
@@ -2503,13 +2565,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
locations->SetOut(Location::RegisterLocation(EAX));
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
- codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
+ codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2));
__ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
__ fs()->call(
@@ -2656,82 +2718,117 @@ void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
-void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
+ /*
+ * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
+ * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+ * For those cases, all we need to ensure is that there is a scheduling barrier in place.
+ */
+ switch (kind) {
+ case MemBarrierKind::kAnyAny: {
+ __ mfence();
+ break;
+ }
+ case MemBarrierKind::kAnyStore:
+ case MemBarrierKind::kLoadAny:
+ case MemBarrierKind::kStoreStore: {
+ // nop
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ }
+}
+
+
+void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) {
+ Label is_null;
+ __ testl(value, value);
+ __ j(kEqual, &is_null);
+ __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
+ __ movl(temp, object);
+ __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
+ __ movb(Address(temp, card, TIMES_1, 0),
+ X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
+ __ Bind(&is_null);
+}
+
+void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- Primitive::Type field_type = instruction->GetFieldType();
- bool needs_write_barrier =
- CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- bool is_byte_type = (field_type == Primitive::kPrimBoolean)
- || (field_type == Primitive::kPrimByte);
- // The register allocator does not support multiple
- // inputs that die at entry with one in a specific register.
- if (is_byte_type) {
- // Ensure the value is in a byte register.
- locations->SetInAt(1, Location::RegisterLocation(EAX));
- } else {
- locations->SetInAt(1, Location::RequiresRegister());
- }
- // Temporary registers for the write barrier.
- if (needs_write_barrier) {
- locations->AddTemp(Location::RequiresRegister());
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
+ if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
+ // Long values can be loaded atomically into an XMM using movsd.
+ // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM
+ // and then copy the XMM into the output 32bits at a time).
+ locations->AddTemp(Location::RequiresFpuRegister());
}
}
-void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
- Primitive::Type field_type = instruction->GetFieldType();
+ Register base = locations->InAt(0).AsRegister<Register>();
+ Location out = locations->Out();
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
switch (field_type) {
- case Primitive::kPrimBoolean:
+ case Primitive::kPrimBoolean: {
+ __ movzxb(out.AsRegister<Register>(), Address(base, offset));
+ break;
+ }
+
case Primitive::kPrimByte: {
- ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>();
- __ movb(Address(obj, offset), value);
+ __ movsxb(out.AsRegister<Register>(), Address(base, offset));
+ break;
+ }
+
+ case Primitive::kPrimShort: {
+ __ movsxw(out.AsRegister<Register>(), Address(base, offset));
break;
}
- case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ movw(Address(obj, offset), value);
+ __ movzxw(out.AsRegister<Register>(), Address(base, offset));
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ movl(Address(obj, offset), value);
-
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(temp, card, obj, value);
- }
+ __ movl(out.AsRegister<Register>(), Address(base, offset));
break;
}
case Primitive::kPrimLong: {
- Location value = locations->InAt(1);
- __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>());
- __ movl(Address(obj, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
+ if (is_volatile) {
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movsd(temp, Address(base, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movd(out.AsRegisterPairLow<Register>(), temp);
+ __ psrlq(temp, Immediate(32));
+ __ movd(out.AsRegisterPairHigh<Register>(), temp);
+ } else {
+ __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
+ }
break;
}
case Primitive::kPrimFloat: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movss(Address(obj, offset), value);
+ __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
break;
}
case Primitive::kPrimDouble: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movsd(Address(obj, offset), value);
+ __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
break;
}
@@ -2739,99 +2836,190 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr
LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
}
-}
-void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) {
- Label is_null;
- __ testl(value, value);
- __ j(kEqual, &is_null);
- __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
- __ movl(temp, object);
- __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
- __ movb(Address(temp, card, TIMES_1, 0),
- X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
- __ Bind(&is_null);
+ // Longs are handled in the switch.
+ if (field_type != Primitive::kPrimLong) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
-void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool is_byte_type = (field_type == Primitive::kPrimBoolean)
+ || (field_type == Primitive::kPrimByte);
+
+ // The register allocator does not support multiple
+ // inputs that die at entry with one in a specific register.
+ if (is_byte_type) {
+ // Ensure the value is in a byte register.
+ locations->SetInAt(1, Location::RegisterLocation(EAX));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ // Temporary registers for the write barrier.
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ locations->AddTemp(Location::RequiresRegister());
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
+ // 64bits value can be atomically written to an address with movsd and an XMM register.
+ // We need two XMM registers because there's no easier way to (bit) copy a register pair
+ // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
+ // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
+ // isolated cases when we need this it isn't worth adding the extra complexity.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
}
-void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+ Register base = locations->InAt(0).AsRegister<Register>();
+ Location value = locations->InAt(1);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- switch (instruction->GetType()) {
- case Primitive::kPrimBoolean: {
- Register out = locations->Out().AsRegister<Register>();
- __ movzxb(out, Address(obj, offset));
- break;
- }
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ }
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
- Register out = locations->Out().AsRegister<Register>();
- __ movsxb(out, Address(obj, offset));
- break;
- }
-
- case Primitive::kPrimShort: {
- Register out = locations->Out().AsRegister<Register>();
- __ movsxw(out, Address(obj, offset));
+ __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
break;
}
+ case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- Register out = locations->Out().AsRegister<Register>();
- __ movzxw(out, Address(obj, offset));
+ __ movw(Address(base, offset), value.AsRegister<Register>());
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- Register out = locations->Out().AsRegister<Register>();
- __ movl(out, Address(obj, offset));
+ __ movl(Address(base, offset), value.AsRegister<Register>());
break;
}
case Primitive::kPrimLong: {
- // TODO: support volatile.
- __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(obj, offset));
- __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(obj, kX86WordSize + offset));
+ if (is_volatile) {
+ XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ __ movd(temp1, value.AsRegisterPairLow<Register>());
+ __ movd(temp2, value.AsRegisterPairHigh<Register>());
+ __ punpckldq(temp1, temp2);
+ __ movsd(Address(base, offset), temp1);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ } else {
+ __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
+ }
break;
}
case Primitive::kPrimFloat: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movss(out, Address(obj, offset));
+ __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
break;
}
case Primitive::kPrimDouble: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movsd(out, Address(obj, offset));
+ __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
break;
}
case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
+ LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
}
+
+ // Longs are handled in the switch.
+ if (field_type != Primitive::kPrimLong) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register card = locations->GetTemp(1).AsRegister<Register>();
+ codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>());
+ }
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+}
+
+void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::Any());
+ Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
+ ? Location::RequiresRegister()
+ : Location::Any();
+ locations->SetInAt(0, loc);
if (instruction->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
}
-void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
+void InstructionCodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (codegen_->CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj = locations->InAt(0);
+
+ __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
@@ -2851,6 +3039,14 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
__ j(kEqual, slow_path->GetEntryLabel());
}
+void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
+ if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+ GenerateImplicitNullCheck(instruction);
+ } else {
+ GenerateExplicitNullCheck(instruction);
+ }
+}
+
void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -2864,7 +3060,8 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
Register obj = locations->InAt(0).AsRegister<Register>();
Location index = locations->InAt(1);
- switch (instruction->GetType()) {
+ Primitive::Type type = instruction->GetType();
+ switch (type) {
case Primitive::kPrimBoolean: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
Register out = locations->Out().AsRegister<Register>();
@@ -2932,10 +3129,12 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
} else {
__ movl(out.AsRegisterPairLow<Register>(),
Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(out.AsRegisterPairHigh<Register>(),
Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
}
@@ -2944,12 +3143,16 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
+ LOG(FATAL) << "Unimplemented register type " << type;
UNREACHABLE();
case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
+ LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
+
+ if (type != Primitive::kPrimLong) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
@@ -3026,6 +3229,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -3049,6 +3253,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -3077,6 +3282,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
if (needs_write_barrier) {
Register temp = locations->GetTemp(0).AsRegister<Register>();
@@ -3098,17 +3304,20 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
if (value.IsRegisterPair()) {
__ movl(Address(obj, offset), value.AsRegisterPairLow<Register>());
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
} else {
DCHECK(value.IsConstant());
int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
__ movl(Address(obj, offset), Immediate(Low32Bits(val)));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val)));
}
} else {
if (value.IsRegisterPair()) {
__ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
value.AsRegisterPairLow<Register>());
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
value.AsRegisterPairHigh<Register>());
} else {
@@ -3116,6 +3325,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
__ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
Immediate(Low32Bits(val)));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
Immediate(High32Bits(val)));
}
@@ -3146,6 +3356,7 @@ void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
Register obj = locations->InAt(0).AsRegister<Register>();
Register out = locations->Out().AsRegister<Register>();
__ movl(out, Address(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -3264,7 +3475,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ movl(Address(ESP, destination.GetStackIndex()), imm);
}
} else {
- LOG(FATAL) << "Unimplemented";
+ LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
}
}
@@ -3383,159 +3594,6 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the X86 memory model.
}
-void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- Register cls = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
-
- switch (instruction->GetType()) {
- case Primitive::kPrimBoolean: {
- Register out = locations->Out().AsRegister<Register>();
- __ movzxb(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimByte: {
- Register out = locations->Out().AsRegister<Register>();
- __ movsxb(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimShort: {
- Register out = locations->Out().AsRegister<Register>();
- __ movsxw(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimChar: {
- Register out = locations->Out().AsRegister<Register>();
- __ movzxw(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- Register out = locations->Out().AsRegister<Register>();
- __ movl(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimLong: {
- // TODO: support volatile.
- __ movl(locations->Out().AsRegisterPairLow<Register>(), Address(cls, offset));
- __ movl(locations->Out().AsRegisterPairHigh<Register>(), Address(cls, kX86WordSize + offset));
- break;
- }
-
- case Primitive::kPrimFloat: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movss(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimDouble: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movsd(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
- UNREACHABLE();
- }
-}
-
-void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- Primitive::Type field_type = instruction->GetFieldType();
- bool needs_write_barrier =
- CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
- bool is_byte_type = (field_type == Primitive::kPrimBoolean)
- || (field_type == Primitive::kPrimByte);
- // The register allocator does not support multiple
- // inputs that die at entry with one in a specific register.
- if (is_byte_type) {
- // Ensure the value is in a byte register.
- locations->SetInAt(1, Location::RegisterLocation(EAX));
- } else {
- locations->SetInAt(1, Location::RequiresRegister());
- }
- // Temporary registers for the write barrier.
- if (needs_write_barrier) {
- locations->AddTemp(Location::RequiresRegister());
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
- }
-}
-
-void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- Register cls = locations->InAt(0).AsRegister<Register>();
- uint32_t offset = instruction->GetFieldOffset().Uint32Value();
- Primitive::Type field_type = instruction->GetFieldType();
-
- switch (field_type) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte: {
- ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>();
- __ movb(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimShort:
- case Primitive::kPrimChar: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ movw(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- Register value = locations->InAt(1).AsRegister<Register>();
- __ movl(Address(cls, offset), value);
-
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(temp, card, cls, value);
- }
- break;
- }
-
- case Primitive::kPrimLong: {
- Location value = locations->InAt(1);
- __ movl(Address(cls, offset), value.AsRegisterPairLow<Register>());
- __ movl(Address(cls, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
- break;
- }
-
- case Primitive::kPrimFloat: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movss(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimDouble: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movsd(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << field_type;
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index aed06c04df..a9086f8876 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -18,6 +18,8 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/x86/assembler_x86.h"
@@ -105,6 +107,8 @@ class LocationsBuilderX86 : public HGraphVisitor {
void HandleBitwiseOperation(HBinaryOperation* instruction);
void HandleInvoke(HInvoke* invoke);
void HandleShift(HBinaryOperation* instruction);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
CodeGeneratorX86* const codegen_;
InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -133,10 +137,19 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void GenerateRemFP(HRem *rem);
void HandleShift(HBinaryOperation* instruction);
void GenerateShlLong(const Location& loc, Register shifter);
void GenerateShrLong(const Location& loc, Register shifter);
void GenerateUShrLong(const Location& loc, Register shifter);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void PushOntoFPStack(Location source, uint32_t temp_offset,
+ uint32_t stack_adjustment, bool is_float);
+
+ void GenerateImplicitNullCheck(HNullCheck* instruction);
+ void GenerateExplicitNullCheck(HNullCheck* instruction);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -146,7 +159,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
class CodeGeneratorX86 : public CodeGenerator {
public:
- explicit CodeGeneratorX86(HGraph* graph);
+ CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86() {}
void GenerateFrameEntry() OVERRIDE;
@@ -160,6 +173,11 @@ class CodeGeneratorX86 : public CodeGenerator {
return kX86WordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ // 8 bytes == 2 words for each spill.
+ return 2 * kX86WordSize;
+ }
+
size_t FrameEntrySpillSize() const OVERRIDE;
HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -216,6 +234,10 @@ class CodeGeneratorX86 : public CodeGenerator {
block_labels_.SetSize(GetGraph()->GetBlocks().Size());
}
+ bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+ return type == Primitive::kPrimLong;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ff7fcdcbac..350392fbf4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,8 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_x86_64.h"
#include "mirror/array-inl.h"
#include "mirror/art_method.h"
#include "mirror/class.h"
@@ -32,8 +34,6 @@ namespace art {
namespace x86_64 {
-static constexpr bool kExplicitStackOverflowCheck = false;
-
// Some x86_64 instructions require a register to be available as temp.
static constexpr Register TMP = R11;
@@ -47,6 +47,8 @@ static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
+static constexpr int kC2ConditionMask = 0x400;
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR
#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
-class SlowPathCodeX86_64 : public SlowPathCode {
- public:
- SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
-
- Label* GetEntryLabel() { return &entry_label_; }
- Label* GetExitLabel() { return &exit_label_; }
-
- private:
- Label entry_label_;
- Label exit_label_;
-
- DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
-};
-
class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
public:
explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
@@ -285,8 +273,8 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 {
codegen->SaveLiveRegisters(locations);
InvokeRuntimeCallingConvention calling_convention;
- x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(0)));
- __ movl(CpuRegister(calling_convention.GetRegisterAt(1)),
+ x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
+ __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
Immediate(instruction_->GetStringIndex()));
__ gs()->call(Address::Absolute(
QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pResolveString), true));
@@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) {
return kEqual;
}
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ CpuRegister temp) {
+ // All registers are assumed to be correctly set up.
+
+ // TODO: Implement all kinds of calls:
+ // 1) boot -> boot
+ // 2) app -> boot
+ // 3) app -> app
+ //
+ // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+ // temp = method;
+ LoadCurrentMethod(temp);
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+ // temp = temp[index_in_cache]
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kX86_64WordSize).SizeValue()));
+
+ DCHECK(!IsLeafMethod());
+ RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
}
@@ -403,8 +416,8 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin
return kX86_64WordSize;
}
-CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
- : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0),
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
+ : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, 0, compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -474,8 +487,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
+ bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks();
- if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
+ if (!skip_overflow_check && implicitStackOverflowChecks) {
__ testq(CpuRegister(RAX), Address(
CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
RecordPcInfo(nullptr, 0);
@@ -485,7 +499,7 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
__ subq(CpuRegister(RSP),
Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
- if (!skip_overflow_check && kExplicitStackOverflowCheck) {
+ if (!skip_overflow_check && !implicitStackOverflowChecks) {
SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
AddSlowPath(slow_path);
@@ -571,8 +585,18 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
} else if (source.IsFpuRegister()) {
__ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
+ } else if (source.IsConstant()) {
+ HConstant* constant = source.GetConstant();
+ int32_t value;
+ if (constant->IsFloatConstant()) {
+ value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+ } else {
+ DCHECK(constant->IsIntConstant());
+ value = constant->AsIntConstant()->GetValue();
+ }
+ __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
} else {
- DCHECK(source.IsStackSlot());
+ DCHECK(source.IsStackSlot()) << source;
__ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
@@ -584,6 +608,17 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
} else if (source.IsFpuRegister()) {
__ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
+ } else if (source.IsConstant()) {
+ HConstant* constant = source.GetConstant();
+ int64_t value = constant->AsLongConstant()->GetValue();
+ if (constant->IsDoubleConstant()) {
+ value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+ } else {
+ DCHECK(constant->IsLongConstant());
+ value = constant->AsLongConstant()->GetValue();
+ }
+ __ movq(CpuRegister(TMP), Immediate(value));
+ __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(source.IsDoubleStackSlot());
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -1123,30 +1158,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
-void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Implement all kinds of calls:
- // 1) boot -> boot
- // 2) app -> boot
- // 3) app -> app
- //
- // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
- // temp = method;
- codegen_->LoadCurrentMethod(temp);
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
- // (temp + offset_of_quick_compiled_code)()
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86_64WordSize).SizeValue()));
+void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
- DCHECK(!codegen_->IsLeafMethod());
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ codegen_->GenerateStaticOrDirectCall(
+ invoke,
+ invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>());
}
void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
@@ -1182,10 +1218,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
+
CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
@@ -1199,6 +1244,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke)
} else {
__ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -1235,6 +1281,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo
} else {
__ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
}
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -1978,6 +2025,81 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
}
}
+void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
+ uint32_t stack_adjustment, bool is_float) {
+ if (source.IsStackSlot()) {
+ DCHECK(is_float);
+ __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
+ } else if (source.IsDoubleStackSlot()) {
+ DCHECK(!is_float);
+ __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
+ } else {
+ // Write the value to the temporary location on the stack and load to FP stack.
+ if (is_float) {
+ Location stack_temp = Location::StackSlot(temp_offset);
+ codegen_->Move(stack_temp, source);
+ __ flds(Address(CpuRegister(RSP), temp_offset));
+ } else {
+ Location stack_temp = Location::DoubleStackSlot(temp_offset);
+ codegen_->Move(stack_temp, source);
+ __ fldl(Address(CpuRegister(RSP), temp_offset));
+ }
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
+ Primitive::Type type = rem->GetResultType();
+ bool is_float = type == Primitive::kPrimFloat;
+ size_t elem_size = Primitive::ComponentSize(type);
+ LocationSummary* locations = rem->GetLocations();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ Location out = locations->Out();
+
+ // Create stack space for 2 elements.
+ // TODO: enhance register allocator to ask for stack temporaries.
+ __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
+
+ // Load the values to the FP stack in reverse order, using temporaries if needed.
+ PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
+ PushOntoFPStack(first, 0, 2 * elem_size, is_float);
+
+ // Loop doing FPREM until we stabilize.
+ Label retry;
+ __ Bind(&retry);
+ __ fprem();
+
+ // Move FP status to AX.
+ __ fstsw();
+
+ // And see if the argument reduction is complete. This is signaled by the
+ // C2 FPU flag bit set to 0.
+ __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
+ __ j(kNotEqual, &retry);
+
+ // We have settled on the final value. Retrieve it into an XMM register.
+ // Store FP top of stack to real stack.
+ if (is_float) {
+ __ fsts(Address(CpuRegister(RSP), 0));
+ } else {
+ __ fstl(Address(CpuRegister(RSP), 0));
+ }
+
+ // Pop the 2 items from the FP stack.
+ __ fucompp();
+
+ // Load the value from the stack into an XMM register.
+ DCHECK(out.IsFpuRegister()) << out;
+ if (is_float) {
+ __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
+ } else {
+ __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
+ }
+
+ // And remove the temporary stack space we allocated.
+ __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
+}
+
void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
@@ -2077,11 +2199,8 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
void LocationsBuilderX86_64::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
- LocationSummary::CallKind call_kind =
- (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)
- ? LocationSummary::kNoCall
- : LocationSummary::kCall;
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
switch (type) {
case Primitive::kPrimInt:
@@ -2095,11 +2214,10 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
- locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
- // The runtime helper puts the result in XMM0.
- locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ locations->SetInAt(0, Location::Any());
+ locations->SetInAt(1, Location::Any());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RegisterLocation(RAX));
break;
}
@@ -2116,14 +2234,9 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
GenerateDivRemIntegral(rem);
break;
}
- case Primitive::kPrimFloat: {
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true));
- codegen_->RecordPcInfo(rem, rem->GetDexPc());
- break;
- }
+ case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true));
- codegen_->RecordPcInfo(rem, rem->GetDexPc());
+ GenerateRemFP(rem);
break;
}
default:
@@ -2311,14 +2424,14 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
locations->SetOut(Location::RegisterLocation(RAX));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
- codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
+ codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2)));
__ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
__ gs()->call(Address::Absolute(
@@ -2389,69 +2502,87 @@ void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) {
LOG(FATAL) << "Unimplemented";
}
-void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+ /*
+ * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
+ * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+ * For those cases, all we need to ensure is that there is a scheduling barrier in place.
+ */
+ switch (kind) {
+ case MemBarrierKind::kAnyAny: {
+ __ mfence();
+ break;
+ }
+ case MemBarrierKind::kAnyStore:
+ case MemBarrierKind::kLoadAny:
+ case MemBarrierKind::kStoreStore: {
+ // nop
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected memory barier " << kind;
+ }
+}
+
+void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- Primitive::Type field_type = instruction->GetFieldType();
- bool needs_write_barrier =
- CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue());
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- }
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
-void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
LocationSummary* locations = instruction->GetLocations();
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
- size_t offset = instruction->GetFieldOffset().SizeValue();
- Primitive::Type field_type = instruction->GetFieldType();
+ CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+ Location out = locations->Out();
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
switch (field_type) {
- case Primitive::kPrimBoolean:
+ case Primitive::kPrimBoolean: {
+ __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
+ break;
+ }
+
case Primitive::kPrimByte: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movb(Address(obj, offset), value);
+ __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
+ break;
+ }
+
+ case Primitive::kPrimShort: {
+ __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
}
- case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movw(Address(obj, offset), value);
+ __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movl(Address(obj, offset), value);
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
- CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
- codegen_->MarkGCCard(temp, card, obj, value);
- }
+ __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
}
case Primitive::kPrimLong: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movq(Address(obj, offset), value);
+ __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
}
case Primitive::kPrimFloat: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movss(Address(obj, offset), value);
+ __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
break;
}
case Primitive::kPrimDouble: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movsd(Address(obj, offset), value);
+ __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
break;
}
@@ -2459,86 +2590,155 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in
LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
}
+
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
-void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(field_info.GetFieldType(), instruction->InputAt(1));
+
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetInAt(1, Location::RequiresRegister());
+ if (needs_write_barrier) {
+ // Temporary registers for the write barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
LocationSummary* locations = instruction->GetLocations();
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
- size_t offset = instruction->GetFieldOffset().SizeValue();
+ CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+ Location value = locations->InAt(1);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- switch (instruction->GetType()) {
- case Primitive::kPrimBoolean: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movzxb(out, Address(obj, offset));
- break;
- }
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ }
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movsxb(out, Address(obj, offset));
- break;
- }
-
- case Primitive::kPrimShort: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movsxw(out, Address(obj, offset));
+ __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
break;
}
+ case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movzxw(out, Address(obj, offset));
+ __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
break;
}
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movl(out, Address(obj, offset));
+ __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
break;
}
case Primitive::kPrimLong: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movq(out, Address(obj, offset));
+ __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
break;
}
case Primitive::kPrimFloat: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movss(out, Address(obj, offset));
+ __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
break;
}
case Primitive::kPrimDouble: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movsd(out, Address(obj, offset));
+ __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
break;
}
case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
+ LOG(FATAL) << "Unreachable type " << field_type;
UNREACHABLE();
}
+
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+ codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>());
+ }
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+}
+
+void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction, instruction->GetFieldInfo());
}
void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::Any());
+ Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
+ ? Location::RequiresRegister()
+ : Location::Any();
+ locations->SetInAt(0, loc);
if (instruction->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
}
-void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
+void InstructionCodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (codegen_->CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj = locations->InAt(0);
+
+ __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
codegen_->AddSlowPath(slow_path);
@@ -2558,6 +2758,14 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
__ j(kEqual, slow_path->GetEntryLabel());
}
+void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
+ if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+ GenerateImplicitNullCheck(instruction);
+ } else {
+ GenerateExplicitNullCheck(instruction);
+ }
+}
+
void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -2675,6 +2883,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
@@ -2743,6 +2952,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -2769,6 +2979,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -2797,7 +3008,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
}
-
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
if (needs_write_barrier) {
DCHECK_EQ(value_type, Primitive::kPrimNot);
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
@@ -2825,6 +3036,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
value.AsRegister<CpuRegister>());
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -2839,6 +3051,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
value.AsFpuRegister<XmmRegister>());
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -2853,6 +3066,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
value.AsFpuRegister<XmmRegister>());
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -2875,6 +3089,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction)
CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
__ movl(out, Address(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -3222,146 +3437,6 @@ void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
}
-void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>();
- size_t offset = instruction->GetFieldOffset().SizeValue();
-
- switch (instruction->GetType()) {
- case Primitive::kPrimBoolean: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movzxb(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimByte: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movsxb(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimShort: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movsxw(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimChar: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movzxw(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movl(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimLong: {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ movq(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimFloat: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movss(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimDouble: {
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
- __ movsd(out, Address(cls, offset));
- break;
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
- UNREACHABLE();
- }
-}
-
-void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
- Primitive::Type field_type = instruction->GetFieldType();
- bool needs_write_barrier =
- CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue());
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>();
- size_t offset = instruction->GetFieldOffset().SizeValue();
- Primitive::Type field_type = instruction->GetFieldType();
-
- switch (field_type) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movb(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimShort:
- case Primitive::kPrimChar: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movw(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movl(Address(cls, offset), value);
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
- CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
- codegen_->MarkGCCard(temp, card, cls, value);
- }
- break;
- }
-
- case Primitive::kPrimLong: {
- CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
- __ movq(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimFloat: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movss(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimDouble: {
- XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
- __ movsd(Address(cls, offset), value);
- break;
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << field_type;
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 794b81ffbc..ead771a1f2 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -18,6 +18,8 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/x86_64/assembler_x86_64.h"
@@ -35,6 +37,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] =
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
+static constexpr bool kCoalescedImplicitNullCheck = false;
+
class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
InvokeDexCallingConvention() : CallingConvention(
@@ -66,7 +70,20 @@ class InvokeDexCallingConventionVisitor {
};
class CodeGeneratorX86_64;
-class SlowPathCodeX86_64;
+
+class SlowPathCodeX86_64 : public SlowPathCode {
+ public:
+ SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
+
+ Label* GetEntryLabel() { return &entry_label_; }
+ Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Label entry_label_;
+ Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
+};
class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
public:
@@ -109,6 +126,8 @@ class LocationsBuilderX86_64 : public HGraphVisitor {
void HandleInvoke(HInvoke* invoke);
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction);
CodeGeneratorX86_64* const codegen_;
InvokeDexCallingConventionVisitor parameter_visitor_;
@@ -136,8 +155,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
void HandleBitwiseOperation(HBinaryOperation* operation);
+ void GenerateRemFP(HRem *rem);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleShift(HBinaryOperation* operation);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+ void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateImplicitNullCheck(HNullCheck* instruction);
+ void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void PushOntoFPStack(Location source, uint32_t temp_offset,
+ uint32_t stack_adjustment, bool is_float);
X86_64Assembler* const assembler_;
CodeGeneratorX86_64* const codegen_;
@@ -147,7 +174,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
class CodeGeneratorX86_64 : public CodeGenerator {
public:
- explicit CodeGeneratorX86_64(HGraph* graph);
+ CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86_64() {}
void GenerateFrameEntry() OVERRIDE;
@@ -163,6 +190,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return kX86_64WordSize;
}
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ return kX86_64WordSize;
+ }
+
size_t FrameEntrySpillSize() const OVERRIDE;
HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -212,6 +243,12 @@ class CodeGeneratorX86_64 : public CodeGenerator {
block_labels_.SetSize(GetGraph()->GetBlocks().Size());
}
+ bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ return false;
+ }
+
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 8b75cc7c65..aa4fc8f611 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -17,6 +17,7 @@
#include <functional>
#include "arch/instruction_set.h"
+#include "arch/arm/instruction_set_features_arm.h"
#include "base/macros.h"
#include "builder.h"
#include "code_generator_arm.h"
@@ -26,6 +27,7 @@
#include "common_compiler_test.h"
#include "dex_file.h"
#include "dex_instruction.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
@@ -79,7 +81,8 @@ template <typename Expected>
static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
InternalCodeAllocator allocator;
- x86::CodeGeneratorX86 codegenX86(graph);
+ CompilerOptions compiler_options;
+ x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
// We avoid doing a stack overflow check that requires the runtime being setup,
// by making sure the compiler knows the methods we are running are leaf methods.
codegenX86.CompileBaseline(&allocator, true);
@@ -87,19 +90,21 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
Run(allocator, codegenX86, has_result, expected);
}
- arm::CodeGeneratorARM codegenARM(graph);
+ std::unique_ptr<const ArmInstructionSetFeatures> features(
+ ArmInstructionSetFeatures::FromCppDefines());
+ arm::CodeGeneratorARM codegenARM(graph, *features.get(), compiler_options);
codegenARM.CompileBaseline(&allocator, true);
if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
Run(allocator, codegenARM, has_result, expected);
}
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph);
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
codegenX86_64.CompileBaseline(&allocator, true);
if (kRuntimeISA == kX86_64) {
Run(allocator, codegenX86_64, has_result, expected);
}
- arm64::CodeGeneratorARM64 codegenARM64(graph);
+ arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
codegenARM64.CompileBaseline(&allocator, true);
if (kRuntimeISA == kArm64) {
Run(allocator, codegenARM64, has_result, expected);
@@ -129,17 +134,20 @@ static void RunCodeOptimized(HGraph* graph,
std::function<void(HGraph*)> hook_before_codegen,
bool has_result,
Expected expected) {
+ CompilerOptions compiler_options;
if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
- arm::CodeGeneratorARM codegenARM(graph);
+ arm::CodeGeneratorARM codegenARM(graph,
+ *ArmInstructionSetFeatures::FromCppDefines(),
+ compiler_options);
RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kArm64) {
- arm64::CodeGeneratorARM64 codegenARM64(graph);
+ arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86) {
- x86::CodeGeneratorX86 codegenX86(graph);
+ x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86_64) {
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph);
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
}
}
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index cad6683577..6ceccfbf0e 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -19,6 +19,7 @@
#include "code_generator_x86.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
+#include "driver/compiler_options.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
@@ -45,11 +46,11 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(expected_before, actual_before);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
HConstantFolding(graph).Run();
- SSAChecker ssa_checker(&allocator, graph);
- ssa_checker.Run();
- ASSERT_TRUE(ssa_checker.IsValid());
+ SSAChecker ssa_checker_cf(&allocator, graph);
+ ssa_checker_cf.Run();
+ ASSERT_TRUE(ssa_checker_cf.IsValid());
StringPrettyPrinter printer_after_cf(graph);
printer_after_cf.VisitInsertionOrder();
@@ -59,8 +60,9 @@ static void TestCode(const uint16_t* data,
check_after_cf(graph);
HDeadCodeElimination(graph).Run();
- ssa_checker.Run();
- ASSERT_TRUE(ssa_checker.IsValid());
+ SSAChecker ssa_checker_dce(&allocator, graph);
+ ssa_checker_dce.Run();
+ ASSERT_TRUE(ssa_checker_dce.IsValid());
StringPrettyPrinter printer_after_dce(graph);
printer_after_dce.VisitInsertionOrder();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 3dbd04e250..a644719622 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86.h"
#include "dead_code_elimination.h"
+#include "driver/compiler_options.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
@@ -39,7 +40,7 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(actual_before, expected_before);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
HDeadCodeElimination(graph).Run();
SSAChecker ssa_checker(&allocator, graph);
ssa_checker.Run();
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 5d712feb2b..291b14cb52 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -16,9 +16,9 @@
#include "graph_checker.h"
-#include <string>
#include <map>
#include <sstream>
+#include <string>
#include "base/bit_vector-inl.h"
@@ -123,6 +123,14 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
void GraphChecker::VisitInstruction(HInstruction* instruction) {
+ if (seen_ids_.IsBitSet(instruction->GetId())) {
+ std::stringstream error;
+ error << "Duplicate id in graph " << instruction->GetId() << ".";
+ errors_.push_back(error.str());
+ } else {
+ seen_ids_.SetBit(instruction->GetId());
+ }
+
// Ensure `instruction` is associated with `current_block_`.
if (instruction->GetBlock() != current_block_) {
std::stringstream error;
@@ -355,9 +363,29 @@ static Primitive::Type PrimitiveKind(Primitive::Type type) {
}
}
+void SSAChecker::VisitIf(HIf* instruction) {
+ VisitInstruction(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ if (input->IsIntConstant()) {
+ int value = input->AsIntConstant()->GetValue();
+ if (value != 0 && value != 1) {
+ std::stringstream error;
+ error << "If instruction " << instruction->GetId()
+ << " has a non-boolean constant input whose value is: "
+ << value << ".";
+ errors_.push_back(error.str());
+ }
+ } else if (instruction->InputAt(0)->GetType() != Primitive::kPrimBoolean) {
+ std::stringstream error;
+ error << "If instruction " << instruction->GetId()
+ << " has a non-boolean input type: "
+ << instruction->InputAt(0)->GetType() << ".";
+ errors_.push_back(error.str());
+ }
+}
+
void SSAChecker::VisitCondition(HCondition* op) {
VisitInstruction(op);
- // TODO: check inputs types, and special case the `null` check.
if (op->GetType() != Primitive::kPrimBoolean) {
std::stringstream error;
error << "Condition " << op->DebugName() << " " << op->GetId()
@@ -365,6 +393,46 @@ void SSAChecker::VisitCondition(HCondition* op) {
<< op->GetType() << ".";
errors_.push_back(error.str());
}
+ HInstruction* lhs = op->InputAt(0);
+ HInstruction* rhs = op->InputAt(1);
+ if (lhs->GetType() == Primitive::kPrimNot) {
+ if (!op->IsEqual() && !op->IsNotEqual()) {
+ std::stringstream error;
+ error << "Condition " << op->DebugName() << " " << op->GetId()
+ << " uses an object as left-hand side input.";
+ errors_.push_back(error.str());
+ }
+ if (rhs->IsIntConstant() && rhs->AsIntConstant()->GetValue() != 0) {
+ std::stringstream error;
+ error << "Condition " << op->DebugName() << " " << op->GetId()
+ << " compares an object with a non-0 integer: "
+ << rhs->AsIntConstant()->GetValue()
+ << ".";
+ errors_.push_back(error.str());
+ }
+ } else if (rhs->GetType() == Primitive::kPrimNot) {
+ if (!op->IsEqual() && !op->IsNotEqual()) {
+ std::stringstream error;
+ error << "Condition " << op->DebugName() << " " << op->GetId()
+ << " uses an object as right-hand side input.";
+ errors_.push_back(error.str());
+ }
+ if (lhs->IsIntConstant() && lhs->AsIntConstant()->GetValue() != 0) {
+ std::stringstream error;
+ error << "Condition " << op->DebugName() << " " << op->GetId()
+ << " compares a non-0 integer with an object: "
+ << lhs->AsIntConstant()->GetValue()
+ << ".";
+ errors_.push_back(error.str());
+ }
+ } else if (PrimitiveKind(lhs->GetType()) != PrimitiveKind(rhs->GetType())) {
+ std::stringstream error;
+ error << "Condition " << op->DebugName() << " " << op->GetId()
+ << " has inputs of different type: "
+ << lhs->GetType() << ", and " << rhs->GetType()
+ << ".";
+ errors_.push_back(error.str());
+ }
}
void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index b6c9f1720c..ae1557b57c 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -30,7 +30,8 @@ class GraphChecker : public HGraphDelegateVisitor {
const char* dump_prefix = "art::GraphChecker: ")
: HGraphDelegateVisitor(graph),
allocator_(allocator),
- dump_prefix_(dump_prefix) {}
+ dump_prefix_(dump_prefix),
+ seen_ids_(allocator, graph->GetCurrentInstructionId(), false) {}
// Check the whole graph (in insertion order).
virtual void Run() { VisitInsertionOrder(); }
@@ -68,6 +69,7 @@ class GraphChecker : public HGraphDelegateVisitor {
private:
// String displayed before dumped errors.
const char* const dump_prefix_;
+ ArenaBitVector seen_ids_;
DISALLOW_COPY_AND_ASSIGN(GraphChecker);
};
@@ -99,6 +101,7 @@ class SSAChecker : public GraphChecker {
void VisitPhi(HPhi* phi) OVERRIDE;
void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
void VisitCondition(HCondition* op) OVERRIDE;
+ void VisitIf(HIf* instruction) OVERRIDE;
private:
DISALLOW_COPY_AND_ASSIGN(SSAChecker);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 5d1703e237..df21c8e9c3 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -30,10 +30,12 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
HGraphVisualizerPrinter(HGraph* graph,
std::ostream& output,
const char* pass_name,
+ bool is_after_pass,
const CodeGenerator& codegen)
: HGraphVisitor(graph),
output_(output),
pass_name_(pass_name),
+ is_after_pass_(is_after_pass),
codegen_(codegen),
indent_(0) {}
@@ -136,14 +138,21 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << "invalid";
} else if (location.IsStackSlot()) {
output_ << location.GetStackIndex() << "(sp)";
+ } else if (location.IsFpuRegisterPair()) {
+ codegen_.DumpFloatingPointRegister(output_, location.low());
+ output_ << " and ";
+ codegen_.DumpFloatingPointRegister(output_, location.high());
+ } else if (location.IsRegisterPair()) {
+ codegen_.DumpCoreRegister(output_, location.low());
+ output_ << " and ";
+ codegen_.DumpCoreRegister(output_, location.high());
} else {
DCHECK(location.IsDoubleStackSlot());
output_ << "2x" << location.GetStackIndex() << "(sp)";
}
}
- void VisitParallelMove(HParallelMove* instruction) {
- output_ << instruction->DebugName();
+ void VisitParallelMove(HParallelMove* instruction) OVERRIDE {
output_ << " (";
for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) {
MoveOperands* move = instruction->MoveOperandsAt(i);
@@ -158,8 +167,25 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
}
- void VisitInstruction(HInstruction* instruction) {
+ void VisitIntConstant(HIntConstant* instruction) OVERRIDE {
+ output_ << " " << instruction->GetValue();
+ }
+
+ void VisitLongConstant(HLongConstant* instruction) OVERRIDE {
+ output_ << " " << instruction->GetValue();
+ }
+
+ void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE {
+ output_ << " " << instruction->GetValue();
+ }
+
+ void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE {
+ output_ << " " << instruction->GetValue();
+ }
+
+ void PrintInstruction(HInstruction* instruction) {
output_ << instruction->DebugName();
+ instruction->Accept(this);
if (instruction->InputCount() > 0) {
output_ << " [ ";
for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
@@ -201,19 +227,20 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
int bci = 0;
output_ << bci << " " << instruction->NumberOfUses()
<< " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " ";
- instruction->Accept(this);
+ PrintInstruction(instruction);
output_ << kEndInstructionMarker << std::endl;
}
}
void Run() {
StartTag("cfg");
- PrintProperty("name", pass_name_);
+ std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)");
+ PrintProperty("name", pass_desc.c_str());
VisitInsertionOrder();
EndTag("cfg");
}
- void VisitBasicBlock(HBasicBlock* block) {
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
StartTag("block");
PrintProperty("name", "B", block->GetBlockId());
if (block->GetLifetimeStart() != kNoLifetime) {
@@ -259,6 +286,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
private:
std::ostream& output_;
const char* pass_name_;
+ const bool is_after_pass_;
const CodeGenerator& codegen_;
size_t indent_;
@@ -270,7 +298,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output,
const char* string_filter,
const CodeGenerator& codegen,
const char* method_name)
- : output_(output), graph_(graph), codegen_(codegen), is_enabled_(false) {
+ : output_(output), graph_(graph), codegen_(codegen), is_enabled_(false) {
if (output == nullptr) {
return;
}
@@ -279,7 +307,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output,
}
is_enabled_ = true;
- HGraphVisualizerPrinter printer(graph, *output_, "", codegen_);
+ HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
printer.StartTag("compilation");
printer.PrintProperty("name", method_name);
printer.PrintProperty("method", method_name);
@@ -287,12 +315,11 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output,
printer.EndTag("compilation");
}
-void HGraphVisualizer::DumpGraph(const char* pass_name) const {
- if (!is_enabled_) {
- return;
+void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const {
+ if (is_enabled_) {
+ HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_);
+ printer.Run();
}
- HGraphVisualizerPrinter printer(graph_, *output_, pass_name, codegen_);
- printer.Run();
}
} // namespace art
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index b5baed9c99..b90d15e1ff 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -32,28 +32,18 @@ static const char* kLivenessPassName = "liveness";
static const char* kRegisterAllocatorPassName = "register";
/**
- * If enabled, emits compilation information suitable for the c1visualizer tool
- * and IRHydra.
- * Currently only works if the compiler is single threaded.
+ * This class outputs the HGraph in the C1visualizer format.
+ * Note: Currently only works if the compiler is single threaded.
*/
class HGraphVisualizer : public ValueObject {
public:
- /**
- * If output is not null, and the method name of the dex compilation
- * unit contains `string_filter`, the compilation information will be
- * emitted.
- */
HGraphVisualizer(std::ostream* output,
HGraph* graph,
const char* string_filter,
const CodeGenerator& codegen,
const char* method_name);
- /**
- * If this visualizer is enabled, emit the compilation information
- * in `output_`.
- */
- void DumpGraph(const char* pass_name) const;
+ void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
private:
std::ostream* const output_;
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 94ff192264..48f1ea9e15 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -40,18 +40,22 @@ TEST(GVNTest, LocalFieldElimination) {
entry->AddSuccessor(block);
block->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot,
+ MemberOffset(42), false));
block->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot,
+ MemberOffset(42), false));
HInstruction* to_remove = block->GetLastInstruction();
block->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(43)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot,
+ MemberOffset(43), false));
HInstruction* different_offset = block->GetLastInstruction();
// Kill the value.
block->AddInstruction(new (&allocator) HInstanceFieldSet(
- parameter, parameter, Primitive::kPrimNot, MemberOffset(42)));
+ parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false));
block->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimNot,
+ MemberOffset(42), false));
HInstruction* use_after_kill = block->GetLastInstruction();
block->AddInstruction(new (&allocator) HExit());
@@ -82,7 +86,8 @@ TEST(GVNTest, GlobalFieldElimination) {
graph->AddBlock(block);
entry->AddSuccessor(block);
block->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction()));
HBasicBlock* then = new (&allocator) HBasicBlock(graph);
@@ -98,13 +103,16 @@ TEST(GVNTest, GlobalFieldElimination) {
else_->AddSuccessor(join);
then->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
then->AddInstruction(new (&allocator) HGoto());
else_->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
else_->AddInstruction(new (&allocator) HGoto());
join->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
join->AddInstruction(new (&allocator) HExit());
graph->TryBuildingSsa();
@@ -132,7 +140,8 @@ TEST(GVNTest, LoopFieldElimination) {
graph->AddBlock(block);
entry->AddSuccessor(block);
block->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
block->AddInstruction(new (&allocator) HGoto());
HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph);
@@ -148,22 +157,25 @@ TEST(GVNTest, LoopFieldElimination) {
loop_body->AddSuccessor(loop_header);
loop_header->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction();
loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction()));
// Kill inside the loop body to prevent field gets inside the loop header
// and the body to be GVN'ed.
loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(
- parameter, parameter, Primitive::kPrimNot, MemberOffset(42)));
+ parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false));
HInstruction* field_set = loop_body->GetLastInstruction();
loop_body->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction();
loop_body->AddInstruction(new (&allocator) HGoto());
exit->AddInstruction(
- new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean, MemberOffset(42)));
+ new (&allocator) HInstanceFieldGet(parameter, Primitive::kPrimBoolean,
+ MemberOffset(42), false));
HInstruction* field_get_in_exit = exit->GetLastInstruction();
exit->AddInstruction(new (&allocator) HExit());
@@ -242,7 +254,7 @@ TEST(GVNTest, LoopSideEffects) {
{
// Make one block with a side effect.
entry->AddInstruction(new (&allocator) HInstanceFieldSet(
- parameter, parameter, Primitive::kPrimNot, MemberOffset(42)));
+ parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false));
GlobalValueNumberer gvn(&allocator, graph);
gvn.Run();
@@ -256,7 +268,7 @@ TEST(GVNTest, LoopSideEffects) {
{
outer_loop_body->InsertInstructionBefore(
new (&allocator) HInstanceFieldSet(
- parameter, parameter, Primitive::kPrimNot, MemberOffset(42)),
+ parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false),
outer_loop_body->GetLastInstruction());
GlobalValueNumberer gvn(&allocator, graph);
@@ -273,7 +285,7 @@ TEST(GVNTest, LoopSideEffects) {
outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction());
inner_loop_body->InsertInstructionBefore(
new (&allocator) HInstanceFieldSet(
- parameter, parameter, Primitive::kPrimNot, MemberOffset(42)),
+ parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false),
inner_loop_body->GetLastInstruction());
GlobalValueNumberer gvn(&allocator, graph);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 1de5b78121..532167c179 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -27,6 +27,7 @@
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
#include "nodes.h"
+#include "register_allocator.h"
#include "ssa_phi_elimination.h"
#include "scoped_thread_state_change.h"
#include "thread.h"
@@ -43,10 +44,10 @@ void HInliner::Run() {
instr_it.Advance()) {
HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect();
if (current != nullptr) {
- if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) {
+ if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) {
if (kIsDebugBuild) {
std::string callee_name =
- PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile());
+ PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
bool should_inline = callee_name.find("$inline$") != std::string::npos;
CHECK(!should_inline) << "Could not inline " << callee_name;
}
@@ -143,6 +144,13 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
return false;
}
+ if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
+ compiler_driver_->GetInstructionSet())) {
+ VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file)
+ << " cannot be inlined because of the register allocator";
+ return false;
+ }
+
if (!callee_graph->TryBuildingSsa()) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file)
<< " could not be transformed to SSA";
@@ -200,6 +208,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
}
callee_graph->InlineInto(graph_, invoke_instruction);
+
+ // Now that we have inlined the callee, we need to update the next
+ // instruction id of the caller, so that new instructions added
+ // after optimizations get a unique id.
+ graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file);
outer_stats_->RecordStat(kInlinedInvoke);
return true;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
new file mode 100644
index 0000000000..fe0e7f2eb2
--- /dev/null
+++ b/compiler/optimizing/intrinsics.cc
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "driver/compiler_driver.h"
+#include "invoke_type.h"
+#include "nodes.h"
+#include "quick/inline_method_analyser.h"
+
+namespace art {
+
+// Function that returns whether an intrinsic is static/direct or virtual.
+static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
+ switch (i) {
+ case Intrinsics::kNone:
+ return kInterface; // Non-sensical for intrinsic.
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ case Intrinsics::k ## Name: \
+ return IsStatic;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return kInterface;
+}
+
+
+
+static Primitive::Type GetType(uint64_t data, bool is_op_size) {
+ if (is_op_size) {
+ switch (static_cast<OpSize>(data)) {
+ case kSignedByte:
+ return Primitive::Type::kPrimByte;
+ case kSignedHalf:
+ return Primitive::Type::kPrimShort;
+ case k32:
+ return Primitive::Type::kPrimInt;
+ case k64:
+ return Primitive::Type::kPrimLong;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << data;
+ UNREACHABLE();
+ }
+ } else {
+ if ((data & kIntrinsicFlagIsLong) != 0) {
+ return Primitive::Type::kPrimLong;
+ }
+ if ((data & kIntrinsicFlagIsObject) != 0) {
+ return Primitive::Type::kPrimNot;
+ }
+ return Primitive::Type::kPrimInt;
+ }
+}
+
+static Intrinsics GetIntrinsic(InlineMethod method) {
+ switch (method.opcode) {
+ // Floating-point conversions.
+ case kIntrinsicDoubleCvt:
+ return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+ Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble;
+ case kIntrinsicFloatCvt:
+ return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+ Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
+
+ // Bit manipulations.
+ case kIntrinsicReverseBits:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kIntegerReverse;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kLongReverse;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ case kIntrinsicReverseBytes:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimShort:
+ return Intrinsics::kShortReverseBytes;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kIntegerReverseBytes;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kLongReverseBytes;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+
+ // Abs.
+ case kIntrinsicAbsDouble:
+ return Intrinsics::kMathAbsDouble;
+ case kIntrinsicAbsFloat:
+ return Intrinsics::kMathAbsFloat;
+ case kIntrinsicAbsInt:
+ return Intrinsics::kMathAbsInt;
+ case kIntrinsicAbsLong:
+ return Intrinsics::kMathAbsLong;
+
+ // Min/max.
+ case kIntrinsicMinMaxDouble:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble;
+ case kIntrinsicMinMaxFloat:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat;
+ case kIntrinsicMinMaxInt:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt;
+ case kIntrinsicMinMaxLong:
+ return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+ Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
+
+ // Misc math.
+ case kIntrinsicSqrt:
+ return Intrinsics::kMathSqrt;
+ case kIntrinsicCeil:
+ return Intrinsics::kMathCeil;
+ case kIntrinsicFloor:
+ return Intrinsics::kMathFloor;
+ case kIntrinsicRint:
+ return Intrinsics::kMathRint;
+ case kIntrinsicRoundDouble:
+ return Intrinsics::kMathRoundDouble;
+ case kIntrinsicRoundFloat:
+ return Intrinsics::kMathRoundFloat;
+
+ // System.arraycopy.
+ case kIntrinsicSystemArrayCopyCharArray:
+ return Intrinsics::kSystemArrayCopyChar;
+
+ // Thread.currentThread.
+ case kIntrinsicCurrentThread:
+ return Intrinsics::kThreadCurrentThread;
+
+ // Memory.peek.
+ case kIntrinsicPeek:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimByte:
+ return Intrinsics::kMemoryPeekByte;
+ case Primitive::Type::kPrimShort:
+ return Intrinsics::kMemoryPeekShortNative;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kMemoryPeekIntNative;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kMemoryPeekLongNative;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+
+ // Memory.poke.
+ case kIntrinsicPoke:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::Type::kPrimByte:
+ return Intrinsics::kMemoryPokeByte;
+ case Primitive::Type::kPrimShort:
+ return Intrinsics::kMemoryPokeShortNative;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kMemoryPokeIntNative;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kMemoryPokeLongNative;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+
+ // String.
+ case kIntrinsicCharAt:
+ return Intrinsics::kStringCharAt;
+ case kIntrinsicCompareTo:
+ return Intrinsics::kStringCompareTo;
+ case kIntrinsicIsEmptyOrLength:
+ return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+ Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
+ case kIntrinsicIndexOf:
+ return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
+ Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+
+ case kIntrinsicCas:
+ switch (GetType(method.d.data, false)) {
+ case Primitive::Type::kPrimNot:
+ return Intrinsics::kUnsafeCASObject;
+ case Primitive::Type::kPrimInt:
+ return Intrinsics::kUnsafeCASInt;
+ case Primitive::Type::kPrimLong:
+ return Intrinsics::kUnsafeCASLong;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ case kIntrinsicUnsafeGet: {
+ const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
+ switch (GetType(method.d.data, false)) {
+ case Primitive::Type::kPrimInt:
+ return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet;
+ case Primitive::Type::kPrimLong:
+ return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ }
+ case kIntrinsicUnsafePut: {
+ enum Sync { kNoSync, kVolatile, kOrdered };
+ const Sync sync =
+ ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile :
+ ((method.d.data & kIntrinsicFlagIsOrdered) != 0) ? kOrdered :
+ kNoSync;
+ switch (GetType(method.d.data, false)) {
+ case Primitive::Type::kPrimInt:
+ switch (sync) {
+ case kNoSync:
+ return Intrinsics::kUnsafePut;
+ case kVolatile:
+ return Intrinsics::kUnsafePutVolatile;
+ case kOrdered:
+ return Intrinsics::kUnsafePutOrdered;
+ }
+ break;
+ case Primitive::Type::kPrimLong:
+ switch (sync) {
+ case kNoSync:
+ return Intrinsics::kUnsafePutLong;
+ case kVolatile:
+ return Intrinsics::kUnsafePutLongVolatile;
+ case kOrdered:
+ return Intrinsics::kUnsafePutLongOrdered;
+ }
+ break;
+ case Primitive::Type::kPrimNot:
+ switch (sync) {
+ case kNoSync:
+ return Intrinsics::kUnsafePutObject;
+ case kVolatile:
+ return Intrinsics::kUnsafePutObjectVolatile;
+ case kOrdered:
+ return Intrinsics::kUnsafePutObjectOrdered;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
+ break;
+ }
+
+ // Virtual cases.
+
+ case kIntrinsicReferenceGetReferent:
+ return Intrinsics::kReferenceGetReferent;
+
+ // Quick inliner cases. Remove after refactoring. They are here so that we can use the
+ // compiler to warn on missing cases.
+
+ case kInlineOpNop:
+ case kInlineOpReturnArg:
+ case kInlineOpNonWideConst:
+ case kInlineOpIGet:
+ case kInlineOpIPut:
+ return Intrinsics::kNone;
+
+ // No default case to make the compiler warn on missing cases.
+ }
+ return Intrinsics::kNone;
+}
+
+static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
+ // The DexFileMethodInliner should have checked whether the methods are agreeing with
+ // what we expect, i.e., static methods are called as such. Add another check here for
+ // our expectations:
+ // Whenever the intrinsic is marked as static-or-direct, report an error if we find an
+ // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual
+ // functions that will perform a check inline. If the precise type is known, however,
+ // the instruction will be sharpened to an InvokeStaticOrDirect.
+ InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
+ InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
+ invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+ invoke->IsInvokeVirtual() ? kVirtual : kSuper;
+ switch (intrinsic_type) {
+ case kStatic:
+ return (invoke_type == kStatic);
+ case kDirect:
+ return (invoke_type == kDirect);
+ case kVirtual:
+ // Call might be devirtualized.
+ return (invoke_type == kVirtual || invoke_type == kDirect);
+
+ default:
+ return false;
+ }
+}
+
+// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod.
+void IntrinsicsRecognizer::Run() {
+ DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_);
+ DCHECK(inliner != nullptr);
+
+ for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
+ inst_it.Advance()) {
+ HInstruction* inst = inst_it.Current();
+ if (inst->IsInvoke()) {
+ HInvoke* invoke = inst->AsInvoke();
+ InlineMethod method;
+ if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
+ Intrinsics intrinsic = GetIntrinsic(method);
+
+ if (intrinsic != Intrinsics::kNone) {
+ if (!CheckInvokeType(intrinsic, invoke)) {
+ LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
+ << intrinsic << " for "
+ << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_);
+ } else {
+ invoke->SetIntrinsic(intrinsic);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
+ switch (intrinsic) {
+ case Intrinsics::kNone:
+ os << "No intrinsic.";
+ break;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ case Intrinsics::k ## Name: \
+ os << # Name; \
+ break;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef STATIC_INTRINSICS_LIST
+#undef VIRTUAL_INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return os;
+}
+
+} // namespace art
+
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
new file mode 100644
index 0000000000..29cc8efcc3
--- /dev/null
+++ b/compiler/optimizing/intrinsics.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+// Recognize intrinsics from HInvoke nodes.
+class IntrinsicsRecognizer : public HOptimization {
+ public:
+ IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver)
+ : HOptimization(graph, true, "intrinsics_recognition"),
+ dex_file_(dex_file), driver_(driver) {}
+
+ void Run() OVERRIDE;
+
+ private:
+ const DexFile* dex_file_;
+ CompilerDriver* driver_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
+};
+
+class IntrinsicVisitor : public ValueObject {
+ public:
+ virtual ~IntrinsicVisitor() {}
+
+ // Dispatch logic.
+
+ void Dispatch(HInvoke* invoke) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kNone:
+ return;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ case Intrinsics::k ## Name: \
+ Visit ## Name(invoke); \
+ return;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Do not put a default case. That way the compiler will complain if we missed a case.
+ }
+ }
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+ }
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ protected:
+ IntrinsicVisitor() {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
new file mode 100644
index 0000000000..29ca20cca0
--- /dev/null
+++ b/compiler/optimizing/intrinsics_list.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+
+// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected
+// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual).
+
+#define INTRINSICS_LIST(V) \
+ V(DoubleDoubleToRawLongBits, kStatic) \
+ V(DoubleLongBitsToDouble, kStatic) \
+ V(FloatFloatToRawIntBits, kStatic) \
+ V(FloatIntBitsToFloat, kStatic) \
+ V(IntegerReverse, kStatic) \
+ V(IntegerReverseBytes, kStatic) \
+ V(LongReverse, kStatic) \
+ V(LongReverseBytes, kStatic) \
+ V(ShortReverseBytes, kStatic) \
+ V(MathAbsDouble, kStatic) \
+ V(MathAbsFloat, kStatic) \
+ V(MathAbsLong, kStatic) \
+ V(MathAbsInt, kStatic) \
+ V(MathMinDoubleDouble, kStatic) \
+ V(MathMinFloatFloat, kStatic) \
+ V(MathMinLongLong, kStatic) \
+ V(MathMinIntInt, kStatic) \
+ V(MathMaxDoubleDouble, kStatic) \
+ V(MathMaxFloatFloat, kStatic) \
+ V(MathMaxLongLong, kStatic) \
+ V(MathMaxIntInt, kStatic) \
+ V(MathSqrt, kStatic) \
+ V(MathCeil, kStatic) \
+ V(MathFloor, kStatic) \
+ V(MathRint, kStatic) \
+ V(MathRoundDouble, kStatic) \
+ V(MathRoundFloat, kStatic) \
+ V(SystemArrayCopyChar, kStatic) \
+ V(ThreadCurrentThread, kStatic) \
+ V(MemoryPeekByte, kStatic) \
+ V(MemoryPeekIntNative, kStatic) \
+ V(MemoryPeekLongNative, kStatic) \
+ V(MemoryPeekShortNative, kStatic) \
+ V(MemoryPokeByte, kStatic) \
+ V(MemoryPokeIntNative, kStatic) \
+ V(MemoryPokeLongNative, kStatic) \
+ V(MemoryPokeShortNative, kStatic) \
+ V(StringCharAt, kDirect) \
+ V(StringCompareTo, kDirect) \
+ V(StringIsEmpty, kDirect) \
+ V(StringIndexOf, kDirect) \
+ V(StringIndexOfAfter, kDirect) \
+ V(StringLength, kDirect) \
+ V(UnsafeCASInt, kDirect) \
+ V(UnsafeCASLong, kDirect) \
+ V(UnsafeCASObject, kDirect) \
+ V(UnsafeGet, kDirect) \
+ V(UnsafeGetVolatile, kDirect) \
+ V(UnsafeGetLong, kDirect) \
+ V(UnsafeGetLongVolatile, kDirect) \
+ V(UnsafePut, kDirect) \
+ V(UnsafePutOrdered, kDirect) \
+ V(UnsafePutVolatile, kDirect) \
+ V(UnsafePutObject, kDirect) \
+ V(UnsafePutObjectOrdered, kDirect) \
+ V(UnsafePutObjectVolatile, kDirect) \
+ V(UnsafePutLong, kDirect) \
+ V(UnsafePutLongOrdered, kDirect) \
+ V(UnsafePutLongVolatile, kDirect) \
+ \
+ V(ReferenceGetReferent, kVirtual)
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
new file mode 100644
index 0000000000..2c239458f1
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -0,0 +1,984 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_x86_64.h"
+
+#include "code_generator_x86_64.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/x86_64/assembler_x86_64.h"
+#include "utils/x86_64/constants_x86_64.h"
+
+namespace art {
+
+namespace x86_64 {
+
+static constexpr bool kIntrinsified = true;
+
+X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
+ return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() {
+ return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ const LocationSummary* res = invoke->GetLocations();
+ return res != nullptr && res->Intrinsified();
+}
+
+#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
+
+// TODO: trg as memory.
+static void MoveFromReturnRegister(Location trg,
+ Primitive::Type type,
+ CodeGeneratorX86_64* codegen) {
+ if (!trg.IsValid()) {
+ DCHECK(type == Primitive::kPrimVoid);
+ return;
+ }
+
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
+ CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+ if (trg_reg.AsRegister() != RAX) {
+ __ movl(trg_reg, CpuRegister(RAX));
+ }
+ break;
+ }
+ case Primitive::kPrimLong: {
+ CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+ if (trg_reg.AsRegister() != RAX) {
+ __ movq(trg_reg, CpuRegister(RAX));
+ }
+ break;
+ }
+
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected void type for valid location " << trg;
+ UNREACHABLE();
+
+ case Primitive::kPrimDouble: {
+ XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+ if (trg_reg.AsFloatRegister() != XMM0) {
+ __ movsd(trg_reg, XmmRegister(XMM0));
+ }
+ break;
+ }
+ case Primitive::kPrimFloat: {
+ XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+ if (trg_reg.AsFloatRegister() != XMM0) {
+ __ movss(trg_reg, XmmRegister(XMM0));
+ }
+ break;
+ }
+ }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
+ if (invoke->InputCount() == 0) {
+ return;
+ }
+
+ LocationSummary* locations = invoke->GetLocations();
+ InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+ // We're moving potentially two or more locations to locations that could overlap, so we need
+ // a parallel move resolver.
+ HParallelMove parallel_move(arena);
+
+ for (size_t i = 0; i < invoke->InputCount(); i++) {
+ HInstruction* input = invoke->InputAt(i);
+ Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+ Location actual_loc = locations->InAt(i);
+
+ parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ }
+
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+// summary. If an intrinsic modifies those locations before a slowpath call, they must be
+// restored!
+class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+ explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
+ __ Bind(GetEntryLabel());
+
+ codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+ MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+ if (invoke_->IsInvokeStaticOrDirect()) {
+ codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+ } else {
+ UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+ UNREACHABLE();
+ }
+
+ // Copy the result back to the expected output.
+ Location out = invoke_->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
+ DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+ }
+
+ codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The instruction where this slow path is happening.
+ HInvoke* const invoke_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
+};
+
+#undef __
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+ Primitive::Type size,
+ X86_64Assembler* assembler) {
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ switch (size) {
+ case Primitive::kPrimShort:
+ // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+ __ bswapl(out);
+ __ sarl(out, Immediate(16));
+ break;
+ case Primitive::kPrimInt:
+ __ bswapl(out);
+ break;
+ case Primitive::kPrimLong:
+ __ bswapq(out);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+
+// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
+// need is 64b.
+
+static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+ // TODO: Enable memory operations when the assembler supports them.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ // TODO: Allow x86 to work with memory. This requires assembler support, see below.
+ // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location output = locations->Out();
+ CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ if (output.IsFpuRegister()) {
+ // In-register
+ XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+
+ if (is64bit) {
+ __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+ __ movd(xmm_temp, cpu_temp);
+ __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+ } else {
+ __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
+ __ movd(xmm_temp, cpu_temp);
+ __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+ }
+ } else {
+ // TODO: update when assember support is available.
+ UNIMPLEMENTED(FATAL) << "Needs assembler support.";
+// Once assembler support is available, in-memory operations look like this:
+// if (is64bit) {
+// DCHECK(output.IsDoubleStackSlot());
+// // No 64b and with literal.
+// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
+// } else {
+// DCHECK(output.IsStackSlot());
+// // Can use and with a literal directly.
+// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
+// }
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+ CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+ CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+ Location output = locations->Out();
+ CpuRegister out = output.AsRegister<CpuRegister>();
+ CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ if (is64bit) {
+ // Create mask.
+ __ movq(mask, out);
+ __ sarq(mask, Immediate(63));
+ // Add mask.
+ __ addq(out, mask);
+ __ xorq(out, mask);
+ } else {
+ // Create mask.
+ __ movl(mask, out);
+ __ sarl(mask, Immediate(31));
+ // Add mask.
+ __ addl(out, mask);
+ __ xorl(out, mask);
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
+ X86_64Assembler* assembler) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ Label nan, done, op2_label;
+ if (is_double) {
+ __ ucomisd(out, op2);
+ } else {
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (is_double) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (is_double) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
+ if (is_double) {
+ __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ } else {
+ __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ }
+ __ movd(out, cpu_temp, is_double);
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (is_double) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
+ X86_64Assembler* assembler) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ if (is_long) {
+ __ cmpq(out, op2);
+ } else {
+ __ cmpl(out, op2);
+ }
+
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+
+ GetAssembler()->sqrtsd(out, in);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
+ // The inputs plus one temp.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Location of reference to data array
+ const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count
+ const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+ // Starting offset within data array
+ const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
+ // Start of char data with array_
+ const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
+
+ CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location temp_loc = locations->GetTemp(0);
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+ // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we
+ // move to (coalesced) implicit checks, we have to do a null check below.
+ DCHECK(!kCoalescedImplicitNullCheck);
+
+ // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+ // the cost.
+ // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+ // we will not optimize the code for constants (which would save a register).
+
+ SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ X86_64Assembler* assembler = GetAssembler();
+
+ __ cmpl(idx, Address(obj, count_offset));
+ __ j(kAboveEqual, slow_path->GetEntryLabel());
+
+ // Get the actual element.
+ __ movl(temp, idx); // temp := idx.
+ __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
+ __ movl(out, Address(obj, value_offset)); // obj := obj.array.
+ // out = out[2*temp].
+ __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+ CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
+ // x86 allows unaligned access. We do not have to check the input or use specific instructions
+ // to avoid a SIGBUS.
+ switch (size) {
+ case Primitive::kPrimByte:
+ __ movsxb(out, Address(address, 0));
+ break;
+ case Primitive::kPrimShort:
+ __ movsxw(out, Address(address, 0));
+ break;
+ case Primitive::kPrimInt:
+ __ movl(out, Address(address, 0));
+ break;
+ case Primitive::kPrimLong:
+ __ movq(out, Address(address, 0));
+ break;
+ default:
+ LOG(FATAL) << "Type not recognized for peek: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+ CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+ // x86 allows unaligned access. We do not have to check the input or use specific instructions
+ // to avoid a SIGBUS.
+ switch (size) {
+ case Primitive::kPrimByte:
+ __ movb(Address(address, 0), value);
+ break;
+ case Primitive::kPrimShort:
+ __ movw(Address(address, 0), value);
+ break;
+ case Primitive::kPrimInt:
+ __ movl(Address(address, 0), value);
+ break;
+ case Primitive::kPrimLong:
+ __ movq(Address(address, 0), value);
+ break;
+ default:
+ LOG(FATAL) << "Type not recognized for poke: " << size;
+ UNREACHABLE();
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+ CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
+ GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+}
+
+static void GenUnsafeGet(LocationSummary* locations, bool is_long,
+ bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+
+ if (is_long) {
+ __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ } else {
+ // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object
+ // pointer will entail an unpack operation.
+ __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
+ Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+
+// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
+// memory model.
+static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
+ CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+ } else {
+ __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+ }
+
+ if (is_volatile) {
+ __ mfence();
+ }
+
+ if (type == Primitive::kPrimNot) {
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+ locations->GetTemp(1).AsRegister<CpuRegister>(),
+ base,
+ value);
+ }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name) \
+void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+} \
+void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(MathFloor)
+UNIMPLEMENTED_INTRINSIC(MathCeil)
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+} // namespace x86_64
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
new file mode 100644
index 0000000000..c1fa99c2dc
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace x86_64 {
+
+class CodeGeneratorX86_64;
+class X86_64Assembler;
+
+class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* arena_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
+};
+
+class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ X86_64Assembler* GetAssembler();
+
+ ArenaAllocator* GetArena();
+
+ CodeGeneratorX86_64* codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64);
+};
+
+} // namespace x86_64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 59404dcb14..2ab9b571ff 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -22,6 +22,7 @@
#include "code_generator_x86.h"
#include "dex_file.h"
#include "dex_instruction.h"
+#include "driver/compiler_options.h"
#include "graph_visualizer.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -44,7 +45,7 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 007c43e218..ff23eda21e 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -19,6 +19,7 @@
#include "code_generator_x86.h"
#include "dex_file.h"
#include "dex_instruction.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
@@ -63,7 +64,7 @@ TEST(LiveRangesTest, CFG1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -109,7 +110,7 @@ TEST(LiveRangesTest, CFG2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -158,7 +159,7 @@ TEST(LiveRangesTest, CFG3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -235,7 +236,7 @@ TEST(LiveRangesTest, Loop1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
RemoveSuspendChecks(graph);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -313,7 +314,7 @@ TEST(LiveRangesTest, Loop2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -389,7 +390,7 @@ TEST(LiveRangesTest, CFG4) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 6f706c391d..f2d49ac397 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -19,6 +19,7 @@
#include "code_generator_x86.h"
#include "dex_file.h"
#include "dex_instruction.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
@@ -51,7 +52,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
graph->TryBuildingSsa();
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ed5e260a5b..990d662d86 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -20,16 +20,19 @@
namespace art {
-LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind)
+LocationSummary::LocationSummary(HInstruction* instruction,
+ CallKind call_kind,
+ bool intrinsified)
: inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
environment_(instruction->GetBlock()->GetGraph()->GetArena(),
instruction->EnvironmentSize()),
- output_overlaps_(true),
+ output_overlaps_(Location::kOutputOverlap),
call_kind_(call_kind),
stack_mask_(nullptr),
register_mask_(0),
- live_registers_() {
+ live_registers_(),
+ intrinsified_(intrinsified) {
inputs_.SetSize(instruction->InputCount());
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs_.Put(i, Location());
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 1ff26d914c..dda6c94a3d 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -37,7 +37,10 @@ std::ostream& operator<<(std::ostream& os, const Location& location);
*/
class Location : public ValueObject {
public:
- static constexpr bool kNoOutputOverlap = false;
+ enum OutputOverlap {
+ kOutputOverlap,
+ kNoOutputOverlap
+ };
enum Kind {
kInvalid = 0,
@@ -160,6 +163,16 @@ class Location : public ValueObject {
return GetPayload();
}
+ int low() const {
+ DCHECK(IsPair());
+ return GetPayload() >> 16;
+ }
+
+ int high() const {
+ DCHECK(IsPair());
+ return GetPayload() & 0xFFFF;
+ }
+
template <typename T>
T AsRegister() const {
DCHECK(IsRegister());
@@ -175,25 +188,41 @@ class Location : public ValueObject {
template <typename T>
T AsRegisterPairLow() const {
DCHECK(IsRegisterPair());
- return static_cast<T>(GetPayload() >> 16);
+ return static_cast<T>(low());
}
template <typename T>
T AsRegisterPairHigh() const {
DCHECK(IsRegisterPair());
- return static_cast<T>(GetPayload() & 0xFFFF);
+ return static_cast<T>(high());
}
template <typename T>
T AsFpuRegisterPairLow() const {
DCHECK(IsFpuRegisterPair());
- return static_cast<T>(GetPayload() >> 16);
+ return static_cast<T>(low());
}
template <typename T>
T AsFpuRegisterPairHigh() const {
DCHECK(IsFpuRegisterPair());
- return static_cast<T>(GetPayload() & 0xFFFF);
+ return static_cast<T>(high());
+ }
+
+ bool IsPair() const {
+ return IsRegisterPair() || IsFpuRegisterPair();
+ }
+
+ Location ToLow() const {
+ return IsRegisterPair()
+ ? Location::RegisterLocation(low())
+ : Location::FpuRegisterLocation(low());
+ }
+
+ Location ToHigh() const {
+ return IsRegisterPair()
+ ? Location::RegisterLocation(high())
+ : Location::FpuRegisterLocation(high());
}
static uintptr_t EncodeStackIndex(intptr_t stack_index) {
@@ -425,7 +454,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
kCall
};
- LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
+ LocationSummary(HInstruction* instruction,
+ CallKind call_kind = kNoCall,
+ bool intrinsified = false);
void SetInAt(uint32_t at, Location location) {
DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid());
@@ -440,7 +471,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
return inputs_.Size();
}
- void SetOut(Location location, bool overlaps = true) {
+ void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
DCHECK(output_.IsUnallocated() || output_.IsInvalid());
output_overlaps_ = overlaps;
output_ = location;
@@ -525,14 +556,19 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
&& (output_.GetPolicy() == Location::kSameAsFirstInput)) {
return false;
}
- if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) {
+ Location input = inputs_.Get(input_index);
+ if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) {
return false;
}
return true;
}
bool OutputOverlapsWithInputs() const {
- return output_overlaps_;
+ return output_overlaps_ == Location::kOutputOverlap;
+ }
+
+ bool Intrinsified() const {
+ return intrinsified_;
}
private:
@@ -541,7 +577,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
GrowableArray<Location> environment_;
// Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
// share the same register as the inputs.
- bool output_overlaps_;
+ Location::OutputOverlap output_overlaps_;
Location output_;
const CallKind call_kind_;
@@ -554,6 +590,9 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
// Registers that are in use at this position.
RegisterSet live_registers_;
+ // Whether these are locations for an intrinsified call.
+ const bool intrinsified_;
+
ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
DISALLOW_COPY_AND_ASSIGN(LocationSummary);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 2b3ac1ac4a..2c84df4d7f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -15,6 +15,7 @@
*/
#include "nodes.h"
+
#include "ssa_builder.h"
#include "utils/growable_array.h"
@@ -459,6 +460,22 @@ static void RemoveFromUseList(T* user,
}
}
+HInstruction* HInstruction::GetNextDisregardingMoves() const {
+ HInstruction* next = GetNext();
+ while (next != nullptr && next->IsParallelMove()) {
+ next = next->GetNext();
+ }
+ return next;
+}
+
+HInstruction* HInstruction::GetPreviousDisregardingMoves() const {
+ HInstruction* previous = GetPrevious();
+ while (previous != nullptr && previous->IsParallelMove()) {
+ previous = previous->GetPrevious();
+ }
+ return previous;
+}
+
void HInstruction::RemoveUser(HInstruction* user, size_t input_index) {
RemoveFromUseList(user, input_index, &uses_);
}
@@ -646,17 +663,18 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const {
} else if (GetLeft()->IsLongConstant() && GetRight()->IsLongConstant()) {
int64_t value = Evaluate(GetLeft()->AsLongConstant()->GetValue(),
GetRight()->AsLongConstant()->GetValue());
- return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value);
+ if (GetResultType() == Primitive::kPrimLong) {
+ return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value);
+ } else {
+ DCHECK_EQ(GetResultType(), Primitive::kPrimInt);
+ return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value);
+ }
}
return nullptr;
}
bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const {
- HInstruction* previous = if_->GetPrevious();
- while (previous != nullptr && previous->IsParallelMove()) {
- previous = previous->GetPrevious();
- }
- return previous == this;
+ return this == if_->GetPreviousDisregardingMoves();
}
bool HInstruction::Equals(HInstruction* other) const {
@@ -753,13 +771,16 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
}
- // Finally, replace the invoke with the return value of the inlined graph.
+ // Replace the invoke with the return value of the inlined graph.
if (last->IsReturn()) {
invoke->ReplaceWith(last->InputAt(0));
body->RemoveInstruction(last);
} else {
DCHECK(last->IsReturnVoid());
}
+
+ // Finally remove the invoke from the caller.
+ invoke->GetBlock()->RemoveInstruction(invoke);
}
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 601d45e56f..e19bfce9de 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -697,6 +697,9 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
HInstruction* GetNext() const { return next_; }
HInstruction* GetPrevious() const { return previous_; }
+ HInstruction* GetNextDisregardingMoves() const;
+ HInstruction* GetPreviousDisregardingMoves() const;
+
HBasicBlock* GetBlock() const { return block_; }
void SetBlock(HBasicBlock* block) { block_ = block; }
bool IsInBlock() const { return block_ != nullptr; }
@@ -717,6 +720,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
virtual bool CanThrow() const { return false; }
bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
+ virtual bool CanDoImplicitNullCheck() const { return false; }
+
void AddUseAt(HInstruction* user, size_t index) {
uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_);
}
@@ -1581,25 +1586,24 @@ class HLongConstant : public HConstant {
DISALLOW_COPY_AND_ASSIGN(HLongConstant);
};
+enum class Intrinsics {
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name,
+#include "intrinsics_list.h"
+ kNone,
+ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+};
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
+
class HInvoke : public HInstruction {
public:
- HInvoke(ArenaAllocator* arena,
- uint32_t number_of_arguments,
- Primitive::Type return_type,
- uint32_t dex_pc)
- : HInstruction(SideEffects::All()),
- inputs_(arena, number_of_arguments),
- return_type_(return_type),
- dex_pc_(dex_pc) {
- inputs_.SetSize(number_of_arguments);
- }
-
virtual size_t InputCount() const { return inputs_.Size(); }
virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
// Runtime needs to walk the stack, so Dex -> Dex calls need to
// know their environment.
- virtual bool NeedsEnvironment() const { return true; }
+ bool NeedsEnvironment() const OVERRIDE { return true; }
void SetArgumentAt(size_t index, HInstruction* argument) {
SetRawInputAt(index, argument);
@@ -1613,12 +1617,38 @@ class HInvoke : public HInstruction {
uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexMethodIndex() const { return dex_method_index_; }
+
+ Intrinsics GetIntrinsic() {
+ return intrinsic_;
+ }
+
+ void SetIntrinsic(Intrinsics intrinsic) {
+ intrinsic_ = intrinsic;
+ }
+
DECLARE_INSTRUCTION(Invoke);
protected:
+ HInvoke(ArenaAllocator* arena,
+ uint32_t number_of_arguments,
+ Primitive::Type return_type,
+ uint32_t dex_pc,
+ uint32_t dex_method_index)
+ : HInstruction(SideEffects::All()),
+ inputs_(arena, number_of_arguments),
+ return_type_(return_type),
+ dex_pc_(dex_pc),
+ dex_method_index_(dex_method_index),
+ intrinsic_(Intrinsics::kNone) {
+ inputs_.SetSize(number_of_arguments);
+ }
+
GrowableArray<HInstruction*> inputs_;
const Primitive::Type return_type_;
const uint32_t dex_pc_;
+ const uint32_t dex_method_index_;
+ Intrinsics intrinsic_;
private:
DISALLOW_COPY_AND_ASSIGN(HInvoke);
@@ -1630,19 +1660,22 @@ class HInvokeStaticOrDirect : public HInvoke {
uint32_t number_of_arguments,
Primitive::Type return_type,
uint32_t dex_pc,
- uint32_t index_in_dex_cache,
+ uint32_t dex_method_index,
InvokeType invoke_type)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc),
- index_in_dex_cache_(index_in_dex_cache),
+ : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
invoke_type_(invoke_type) {}
- uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ // We access the method via the dex cache so we can't do an implicit null check.
+ // TODO: for intrinsics we can generate implicit null checks.
+ return false;
+ }
+
InvokeType GetInvokeType() const { return invoke_type_; }
DECLARE_INSTRUCTION(InvokeStaticOrDirect);
private:
- const uint32_t index_in_dex_cache_;
const InvokeType invoke_type_;
DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
@@ -1654,10 +1687,16 @@ class HInvokeVirtual : public HInvoke {
uint32_t number_of_arguments,
Primitive::Type return_type,
uint32_t dex_pc,
+ uint32_t dex_method_index,
uint32_t vtable_index)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc),
+ : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
vtable_index_(vtable_index) {}
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ // TODO: Add implicit null checks in intrinsics.
+ return !GetLocations()->Intrinsified();
+ }
+
uint32_t GetVTableIndex() const { return vtable_index_; }
DECLARE_INSTRUCTION(InvokeVirtual);
@@ -1676,17 +1715,20 @@ class HInvokeInterface : public HInvoke {
uint32_t dex_pc,
uint32_t dex_method_index,
uint32_t imt_index)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc),
- dex_method_index_(dex_method_index),
+ : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
imt_index_(imt_index) {}
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ // TODO: Add implicit null checks in intrinsics.
+ return !GetLocations()->Intrinsified();
+ }
+
uint32_t GetImtIndex() const { return imt_index_; }
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
DECLARE_INSTRUCTION(InvokeInterface);
private:
- const uint32_t dex_method_index_;
const uint32_t imt_index_;
DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
@@ -2129,39 +2171,49 @@ class HNullCheck : public HExpression<1> {
class FieldInfo : public ValueObject {
public:
- FieldInfo(MemberOffset field_offset, Primitive::Type field_type)
- : field_offset_(field_offset), field_type_(field_type) {}
+ FieldInfo(MemberOffset field_offset, Primitive::Type field_type, bool is_volatile)
+ : field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile) {}
MemberOffset GetFieldOffset() const { return field_offset_; }
Primitive::Type GetFieldType() const { return field_type_; }
+ bool IsVolatile() const { return is_volatile_; }
private:
const MemberOffset field_offset_;
const Primitive::Type field_type_;
+ const bool is_volatile_;
};
class HInstanceFieldGet : public HExpression<1> {
public:
HInstanceFieldGet(HInstruction* value,
Primitive::Type field_type,
- MemberOffset field_offset)
+ MemberOffset field_offset,
+ bool is_volatile)
: HExpression(field_type, SideEffects::DependsOnSomething()),
- field_info_(field_offset, field_type) {
+ field_info_(field_offset, field_type, is_volatile) {
SetRawInputAt(0, value);
}
- virtual bool CanBeMoved() const { return true; }
- virtual bool InstructionDataEquals(HInstruction* other) const {
- size_t other_offset = other->AsInstanceFieldGet()->GetFieldOffset().SizeValue();
- return other_offset == GetFieldOffset().SizeValue();
+ bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
+
+ bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+ HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
+ return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
}
- virtual size_t ComputeHashCode() const {
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ return GetFieldOffset().Uint32Value() < kPageSize;
+ }
+
+ size_t ComputeHashCode() const OVERRIDE {
return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue();
}
+ const FieldInfo& GetFieldInfo() const { return field_info_; }
MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
+ bool IsVolatile() const { return field_info_.IsVolatile(); }
DECLARE_INSTRUCTION(InstanceFieldGet);
@@ -2176,16 +2228,22 @@ class HInstanceFieldSet : public HTemplateInstruction<2> {
HInstanceFieldSet(HInstruction* object,
HInstruction* value,
Primitive::Type field_type,
- MemberOffset field_offset)
+ MemberOffset field_offset,
+ bool is_volatile)
: HTemplateInstruction(SideEffects::ChangesSomething()),
- field_info_(field_offset, field_type) {
+ field_info_(field_offset, field_type, is_volatile) {
SetRawInputAt(0, object);
SetRawInputAt(1, value);
}
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ return GetFieldOffset().Uint32Value() < kPageSize;
+ }
+
+ const FieldInfo& GetFieldInfo() const { return field_info_; }
MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
-
+ bool IsVolatile() const { return field_info_.IsVolatile(); }
HInstruction* GetValue() const { return InputAt(1); }
DECLARE_INSTRUCTION(InstanceFieldSet);
@@ -2209,6 +2267,15 @@ class HArrayGet : public HExpression<2> {
UNUSED(other);
return true;
}
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ // TODO: We can be smarter here.
+ // Currently, the array access is always preceded by an ArrayLength or a NullCheck
+ // which generates the implicit null check. There are cases when these can be removed
+ // to produce better code. If we ever add optimizations to do so we should allow an
+ // implicit check here (as long as the address falls in the first page).
+ return false;
+ }
+
void SetType(Primitive::Type type) { type_ = type; }
HInstruction* GetArray() const { return InputAt(0); }
@@ -2236,12 +2303,17 @@ class HArraySet : public HTemplateInstruction<3> {
SetRawInputAt(2, value);
}
- bool NeedsEnvironment() const {
+ bool NeedsEnvironment() const OVERRIDE {
// We currently always call a runtime method to catch array store
// exceptions.
return needs_type_check_;
}
+ bool CanDoImplicitNullCheck() const OVERRIDE {
+ // TODO: Same as for ArrayGet.
+ return false;
+ }
+
void ClearNeedsTypeCheck() {
needs_type_check_ = false;
}
@@ -2284,11 +2356,12 @@ class HArrayLength : public HExpression<1> {
SetRawInputAt(0, array);
}
- virtual bool CanBeMoved() const { return true; }
- virtual bool InstructionDataEquals(HInstruction* other) const {
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
UNUSED(other);
return true;
}
+ bool CanDoImplicitNullCheck() const OVERRIDE { return true; }
DECLARE_INSTRUCTION(ArrayLength);
@@ -2497,24 +2570,29 @@ class HStaticFieldGet : public HExpression<1> {
public:
HStaticFieldGet(HInstruction* cls,
Primitive::Type field_type,
- MemberOffset field_offset)
+ MemberOffset field_offset,
+ bool is_volatile)
: HExpression(field_type, SideEffects::DependsOnSomething()),
- field_info_(field_offset, field_type) {
+ field_info_(field_offset, field_type, is_volatile) {
SetRawInputAt(0, cls);
}
- bool CanBeMoved() const OVERRIDE { return true; }
+
+ bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
+
bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
- size_t other_offset = other->AsStaticFieldGet()->GetFieldOffset().SizeValue();
- return other_offset == GetFieldOffset().SizeValue();
+ HStaticFieldGet* other_get = other->AsStaticFieldGet();
+ return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
}
size_t ComputeHashCode() const OVERRIDE {
return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue();
}
+ const FieldInfo& GetFieldInfo() const { return field_info_; }
MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
+ bool IsVolatile() const { return field_info_.IsVolatile(); }
DECLARE_INSTRUCTION(StaticFieldGet);
@@ -2529,15 +2607,18 @@ class HStaticFieldSet : public HTemplateInstruction<2> {
HStaticFieldSet(HInstruction* cls,
HInstruction* value,
Primitive::Type field_type,
- MemberOffset field_offset)
+ MemberOffset field_offset,
+ bool is_volatile)
: HTemplateInstruction(SideEffects::ChangesSomething()),
- field_info_(field_offset, field_type) {
+ field_info_(field_offset, field_type, is_volatile) {
SetRawInputAt(0, cls);
SetRawInputAt(1, value);
}
+ const FieldInfo& GetFieldInfo() const { return field_info_; }
MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
+ bool IsVolatile() const { return field_info_.IsVolatile(); }
HInstruction* GetValue() const { return InputAt(1); }
@@ -2678,7 +2759,7 @@ class HMonitorOperation : public HTemplateInstruction<1> {
DECLARE_INSTRUCTION(MonitorOperation);
- protected:
+ private:
const OperationKind kind_;
const uint32_t dex_pc_;
@@ -2686,7 +2767,6 @@ class HMonitorOperation : public HTemplateInstruction<1> {
DISALLOW_COPY_AND_ASSIGN(HMonitorOperation);
};
-
class MoveOperands : public ArenaObject<kArenaAllocMisc> {
public:
MoveOperands(Location source, Location destination, HInstruction* instruction)
@@ -2748,8 +2828,6 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
// This is only used in debug mode, to ensure we do not connect interval siblings
// in the same parallel move.
HInstruction* instruction_;
-
- DISALLOW_COPY_AND_ASSIGN(MoveOperands);
};
static constexpr size_t kDefaultNumberOfMoves = 4;
@@ -2759,18 +2837,53 @@ class HParallelMove : public HTemplateInstruction<0> {
explicit HParallelMove(ArenaAllocator* arena)
: HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {}
- void AddMove(MoveOperands* move) {
- if (kIsDebugBuild && move->GetInstruction() != nullptr) {
- for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK_NE(moves_.Get(i)->GetInstruction(), move->GetInstruction())
- << "Doing parallel moves for the same instruction.";
+ void AddMove(Location source, Location destination, HInstruction* instruction) {
+ DCHECK(source.IsValid());
+ DCHECK(destination.IsValid());
+ // The parallel move resolver does not handle pairs. So we decompose the
+ // pair locations into two moves.
+ if (source.IsPair() && destination.IsPair()) {
+ AddMove(source.ToLow(), destination.ToLow(), instruction);
+ AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
+ } else if (source.IsPair()) {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction);
+ AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr);
+ } else if (destination.IsPair()) {
+ if (source.IsConstant()) {
+ // We put the same constant in the move. The code generator will handle which
+ // low or high part to use.
+ AddMove(source, destination.ToLow(), instruction);
+ AddMove(source, destination.ToHigh(), nullptr);
+ } else {
+ DCHECK(source.IsDoubleStackSlot());
+ AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction);
+ // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to
+ // always be 4.
+ static constexpr int kHighOffset = 4;
+ AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)),
+ destination.ToHigh(),
+ nullptr);
+ }
+ } else {
+ if (kIsDebugBuild) {
+ if (instruction != nullptr) {
+ for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+ DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
+ << "Doing parallel moves for the same instruction.";
+ }
+ }
+ for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+ DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
+ << "Same destination for two moves in a parallel move.";
+ }
}
+ moves_.Add(MoveOperands(source, destination, instruction));
}
- moves_.Add(move);
}
MoveOperands* MoveOperandsAt(size_t index) const {
- return moves_.Get(index);
+ return moves_.GetRawStorage() + index;
}
size_t NumMoves() const { return moves_.Size(); }
@@ -2778,7 +2891,7 @@ class HParallelMove : public HTemplateInstruction<0> {
DECLARE_INSTRUCTION(ParallelMove);
private:
- GrowableArray<MoveOperands*> moves_;
+ GrowableArray<MoveOperands> moves_;
DISALLOW_COPY_AND_ASSIGN(HParallelMove);
};
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index deebaf7414..1e0d65a945 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
#include "compiler.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "elf_writer_quick.h"
@@ -32,6 +33,7 @@
#include "gvn.h"
#include "inliner.h"
#include "instruction_simplifier.h"
+#include "intrinsics.h"
#include "jni/quick/jni_compiler.h"
#include "mirror/art_method-inl.h"
#include "nodes.h"
@@ -68,13 +70,8 @@ class CodeVectorAllocator FINAL : public CodeAllocator {
};
/**
- * If set to true, generates a file suitable for the c1visualizer tool and IRHydra.
- */
-static bool kIsVisualizerEnabled = false;
-
-/**
* Filter to apply to the visualizer. Methods whose name contain that filter will
- * be in the file.
+ * be dumped.
*/
static const char* kStringFilter = "";
@@ -114,7 +111,7 @@ class OptimizingCompiler FINAL : public Compiler {
void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {}
- void Init() const OVERRIDE {}
+ void Init() OVERRIDE;
void UnInit() const OVERRIDE {}
@@ -123,6 +120,18 @@ class OptimizingCompiler FINAL : public Compiler {
// just run the code generation after the graph was built.
const bool run_optimizations_;
+ // Optimize and compile `graph`.
+ CompiledMethod* CompileOptimized(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ const HGraphVisualizer& visualizer) const;
+
+ // Just compile without doing optimizations.
+ CompiledMethod* CompileBaseline(CodeGenerator* codegen,
+ CompilerDriver* driver,
+ const DexCompilationUnit& dex_compilation_unit) const;
+
mutable OptimizingCompilerStats compilation_stats_;
std::unique_ptr<std::ostream> visualizer_output_;
@@ -136,9 +145,18 @@ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
: Compiler(driver, kMaximumCompilationTimeBeforeWarning),
run_optimizations_(
driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
- compilation_stats_() {
- if (kIsVisualizerEnabled) {
- visualizer_output_.reset(new std::ofstream("art.cfg"));
+ compilation_stats_() {}
+
+void OptimizingCompiler::Init() {
+ // Enable C1visualizer output. Must be done in Init() because the compiler
+ // driver is not fully initialized when passed to the compiler's constructor.
+ CompilerDriver* driver = GetCompilerDriver();
+ const std::string cfg_file_name = driver->GetDumpCfgFileName();
+ if (!cfg_file_name.empty()) {
+ CHECK_EQ(driver->GetThreadCount(), 1U)
+ << "Graph visualizer requires the compiler to run single-threaded. "
+ << "Invoke the compiler with '-j1'.";
+ visualizer_output_.reset(new std::ofstream(cfg_file_name));
}
}
@@ -190,22 +208,27 @@ static void RunOptimizations(HGraph* graph,
SsaRedundantPhiElimination redundant_phi(graph);
SsaDeadPhiElimination dead_phi(graph);
HDeadCodeElimination dce(graph);
- HConstantFolding fold(graph);
+ HConstantFolding fold1(graph);
InstructionSimplifier simplify1(graph);
HInliner inliner(graph, dex_compilation_unit, driver, stats);
+ HConstantFolding fold2(graph);
GVNOptimization gvn(graph);
BoundsCheckElimination bce(graph);
InstructionSimplifier simplify2(graph);
+ IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
+
HOptimization* optimizations[] = {
&redundant_phi,
&dead_phi,
+ &intrinsics,
&dce,
- &fold,
+ &fold1,
&simplify1,
&inliner,
+ &fold2,
&gvn,
&bce,
&simplify2
@@ -213,21 +236,89 @@ static void RunOptimizations(HGraph* graph,
for (size_t i = 0; i < arraysize(optimizations); ++i) {
HOptimization* optimization = optimizations[i];
+ visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/false);
optimization->Run();
- visualizer.DumpGraph(optimization->GetPassName());
+ visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/true);
optimization->Check();
}
}
// The stack map we generate must be 4-byte aligned on ARM. Since existing
// maps are generated alongside these stack maps, we must also align them.
-static std::vector<uint8_t>& AlignVectorSize(std::vector<uint8_t>& vector) {
+static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
size_t size = vector.size();
size_t aligned_size = RoundUp(size, 4);
for (; size < aligned_size; ++size) {
vector.push_back(0);
}
- return vector;
+ return ArrayRef<const uint8_t>(vector);
+}
+
+
+CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ const HGraphVisualizer& visualizer) const {
+ RunOptimizations(
+ graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer);
+
+ PrepareForRegisterAllocation(graph).Run();
+ SsaLivenessAnalysis liveness(*graph, codegen);
+ liveness.Analyze();
+ visualizer.DumpGraph(kLivenessPassName);
+
+ RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
+ register_allocator.AllocateRegisters();
+ visualizer.DumpGraph(kRegisterAllocatorPassName);
+
+ CodeVectorAllocator allocator;
+ codegen->CompileOptimized(&allocator);
+
+ std::vector<uint8_t> stack_map;
+ codegen->BuildStackMaps(&stack_map);
+
+ compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
+
+ return CompiledMethod::SwapAllocCompiledMethodStackMap(
+ compiler_driver,
+ codegen->GetInstructionSet(),
+ ArrayRef<const uint8_t>(allocator.GetMemory()),
+ codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ 0, /* FPR spill mask, unused */
+ ArrayRef<const uint8_t>(stack_map));
+}
+
+
+CompiledMethod* OptimizingCompiler::CompileBaseline(
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit) const {
+ CodeVectorAllocator allocator;
+ codegen->CompileBaseline(&allocator);
+
+ std::vector<uint8_t> mapping_table;
+ DefaultSrcMap src_mapping_table;
+ bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
+ codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
+ std::vector<uint8_t> vmap_table;
+ codegen->BuildVMapTable(&vmap_table);
+ std::vector<uint8_t> gc_map;
+ codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
+
+ compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
+ return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
+ codegen->GetInstructionSet(),
+ ArrayRef<const uint8_t>(allocator.GetMemory()),
+ codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ 0, /* FPR spill mask, unused */
+ &src_mapping_table,
+ AlignVectorSize(mapping_table),
+ AlignVectorSize(vmap_table),
+ AlignVectorSize(gc_map),
+ ArrayRef<const uint8_t>());
}
CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
@@ -239,7 +330,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
const DexFile& dex_file) const {
UNUSED(invoke_type);
compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation);
- InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
+ CompilerDriver* compiler_driver = GetCompilerDriver();
+ InstructionSet instruction_set = compiler_driver->GetInstructionSet();
// Always use the thumb2 assembler: some runtime functionality (like implicit stack
// overflow checks) assume thumb2.
if (instruction_set == kArm) {
@@ -260,7 +352,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
DexCompilationUnit dex_compilation_unit(
nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
class_def_idx, method_idx, access_flags,
- GetCompilerDriver()->GetVerifiedMethod(&dex_file, method_idx));
+ compiler_driver->GetVerifiedMethod(&dex_file, method_idx));
std::string method_name = PrettyMethod(method_idx, dex_file);
@@ -275,7 +367,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
&dex_compilation_unit,
&dex_compilation_unit,
&dex_file,
- GetCompilerDriver(),
+ compiler_driver,
&compilation_stats_);
VLOG(compiler) << "Building " << PrettyMethod(method_idx, dex_file);
@@ -285,21 +377,24 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
return nullptr;
}
- CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
- if (codegen == nullptr) {
+ std::unique_ptr<CodeGenerator> codegen(
+ CodeGenerator::Create(graph,
+ instruction_set,
+ *compiler_driver->GetInstructionSetFeatures(),
+ compiler_driver->GetCompilerOptions()));
+ if (codegen.get() == nullptr) {
CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
HGraphVisualizer visualizer(
- visualizer_output_.get(), graph, kStringFilter, *codegen, method_name.c_str());
+ visualizer_output_.get(), graph, kStringFilter, *codegen.get(), method_name.c_str());
visualizer.DumpGraph("builder");
- CodeVectorAllocator allocator;
-
bool can_optimize = CanOptimize(*code_item);
bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
+ CompiledMethod* result = nullptr;
if (run_optimizations_ && can_optimize && can_allocate_registers) {
VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file);
if (!graph->TryBuildingSsa()) {
@@ -308,33 +403,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
<< ": it contains a non natural loop";
// We could not transform the graph to SSA, bailout.
compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
- return nullptr;
+ } else {
+ result = CompileOptimized(graph, codegen.get(), compiler_driver, dex_compilation_unit, visualizer);
}
- RunOptimizations(
- graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer);
-
- PrepareForRegisterAllocation(graph).Run();
- SsaLivenessAnalysis liveness(*graph, codegen);
- liveness.Analyze();
- visualizer.DumpGraph(kLivenessPassName);
-
- RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
- register_allocator.AllocateRegisters();
-
- visualizer.DumpGraph(kRegisterAllocatorPassName);
- codegen->CompileOptimized(&allocator);
-
- std::vector<uint8_t> stack_map;
- codegen->BuildStackMaps(&stack_map);
-
- compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
- return new CompiledMethod(GetCompilerDriver(),
- instruction_set,
- allocator.GetMemory(),
- codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- 0, /* FPR spill mask, unused */
- stack_map);
} else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
LOG(FATAL) << "Could not allocate registers in optimizing compiler";
UNREACHABLE();
@@ -349,31 +420,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
}
- codegen->CompileBaseline(&allocator);
-
- std::vector<uint8_t> mapping_table;
- SrcMap src_mapping_table;
- codegen->BuildMappingTable(&mapping_table,
- GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ?
- &src_mapping_table : nullptr);
- std::vector<uint8_t> vmap_table;
- codegen->BuildVMapTable(&vmap_table);
- std::vector<uint8_t> gc_map;
- codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
-
- compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
- return new CompiledMethod(GetCompilerDriver(),
- instruction_set,
- allocator.GetMemory(),
- codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- 0, /* FPR spill mask, unused */
- &src_mapping_table,
- AlignVectorSize(mapping_table),
- AlignVectorSize(vmap_table),
- AlignVectorSize(gc_map),
- nullptr);
+ result = CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit);
}
+ return result;
}
Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 7993b19850..cc2723df99 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,6 @@ enum MethodCompilationStat {
kNotCompiledUnresolvedMethod,
kNotCompiledUnresolvedField,
kNotCompiledNonSequentialRegPair,
- kNotCompiledVolatile,
kNotOptimizedTryCatch,
kNotOptimizedDisabled,
kNotCompiledCantAccesType,
@@ -92,7 +91,6 @@ class OptimizingCompilerStats {
case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
- case kNotCompiledVolatile : return "kNotCompiledVolatile";
case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 04b56345c4..b3eb1e2d51 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -45,8 +45,12 @@ namespace art {
LiveInterval* BuildInterval(const size_t ranges[][2],
size_t number_of_ranges,
ArenaAllocator* allocator,
- int reg = -1) {
- LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt);
+ int reg = -1,
+ HInstruction* defined_by = nullptr) {
+ LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt, defined_by);
+ if (defined_by != nullptr) {
+ defined_by->SetLiveInterval(interval);
+ }
for (size_t i = number_of_ranges; i > 0; --i) {
interval->AddRange(ranges[i - 1][0], ranges[i - 1][1]);
}
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 1e93ece2ef..debe466560 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -37,10 +37,12 @@ void ParallelMoveResolver::EmitNativeCode(HParallelMove* parallel_move) {
// Perform the moves with constant sources.
for (size_t i = 0; i < moves_.Size(); ++i) {
- const MoveOperands& move = *moves_.Get(i);
- if (!move.IsEliminated()) {
- DCHECK(move.GetSource().IsConstant());
+ MoveOperands* move = moves_.Get(i);
+ if (!move->IsEliminated()) {
+ DCHECK(move->GetSource().IsConstant());
EmitMove(i);
+ // Eliminate the move, in case following moves need a scratch register.
+ move->Eliminate();
}
}
@@ -55,6 +57,9 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
// unallocated, or the move was already eliminated).
for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
MoveOperands* move = parallel_move->MoveOperandsAt(i);
+ // The parallel move resolver algorithm does not work with register pairs.
+ DCHECK(!move->GetSource().IsPair());
+ DCHECK(!move->GetDestination().IsPair());
if (!move->IsRedundant()) {
moves_.Add(move);
}
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 309425ef4d..7ec1dd2deb 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -58,6 +58,9 @@ class ParallelMoveResolver : public ValueObject {
};
bool IsScratchLocation(Location loc);
+
+ // Allocate a scratch register for performing a move. The method will try to use
+ // a register that is the destination of a move, but that move has not been emitted yet.
int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
// Emit a move.
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 62629bcd0c..28b5697bbd 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -26,16 +26,26 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
public:
explicit TestParallelMoveResolver(ArenaAllocator* allocator) : ParallelMoveResolver(allocator) {}
+ void Dump(Location location) {
+ if (location.IsConstant()) {
+ message_ << "C";
+ } else if (location.IsPair()) {
+ message_ << location.low() << "," << location.high();
+ } else {
+ message_ << location.reg();
+ }
+ }
+
virtual void EmitMove(size_t index) {
MoveOperands* move = moves_.Get(index);
if (!message_.str().empty()) {
message_ << " ";
}
- message_ << "("
- << move->GetSource().reg()
- << " -> "
- << move->GetDestination().reg()
- << ")";
+ message_ << "(";
+ Dump(move->GetSource());
+ message_ << " -> ";
+ Dump(move->GetDestination());
+ message_ << ")";
}
virtual void EmitSwap(size_t index) {
@@ -43,11 +53,11 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
if (!message_.str().empty()) {
message_ << " ";
}
- message_ << "("
- << move->GetSource().reg()
- << " <-> "
- << move->GetDestination().reg()
- << ")";
+ message_ << "(";
+ Dump(move->GetSource());
+ message_ << " <-> ";
+ Dump(move->GetDestination());
+ message_ << ")";
}
virtual void SpillScratch(int reg ATTRIBUTE_UNUSED) {}
@@ -69,10 +79,10 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator,
size_t number_of_moves) {
HParallelMove* moves = new (allocator) HParallelMove(allocator);
for (size_t i = 0; i < number_of_moves; ++i) {
- moves->AddMove(new (allocator) MoveOperands(
+ moves->AddMove(
Location::RegisterLocation(operands[i][0]),
Location::RegisterLocation(operands[i][1]),
- nullptr));
+ nullptr);
}
return moves;
}
@@ -116,16 +126,76 @@ TEST(ParallelMoveTest, Swap) {
{
TestParallelMoveResolver resolver(&allocator);
- static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}};
+ static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 0}};
resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(4 <-> 0) (3 <-> 4) (2 <-> 3) (1 <-> 2)", resolver.GetMessage().c_str());
+ }
+}
+
+TEST(ParallelMoveTest, ConstantLast) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::ConstantLocation(new (&allocator) HIntConstant(0)),
+ Location::RegisterLocation(0),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(1),
+ Location::RegisterLocation(2),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
+}
+
+TEST(ParallelMoveTest, Pairs) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(4),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
}
{
TestParallelMoveResolver resolver(&allocator);
- static constexpr size_t moves[][2] = {{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 1}, {5, 4}};
- resolver.EmitNativeCode(BuildParallelMove(&allocator, moves, arraysize(moves)));
- ASSERT_STREQ("(4 <-> 1) (3 <-> 4) (2 <-> 3) (0 -> 1) (5 -> 4)", resolver.GetMessage().c_str());
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(4),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+ }
+
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(0),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str());
}
}
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index c1c805dc56..e120bc681e 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -27,6 +27,12 @@ namespace art {
static constexpr size_t kMaxLifetimePosition = -1;
static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+
RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
@@ -50,7 +56,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
reserved_out_slots_(0),
- maximum_number_of_live_registers_(0) {
+ maximum_number_of_live_core_registers_(0),
+ maximum_number_of_live_fp_registers_(0) {
codegen->SetupBlockedRegisters();
physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
@@ -64,7 +71,10 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph,
if (!Supports(instruction_set)) {
return false;
}
- if (instruction_set == kArm64 || instruction_set == kX86_64) {
+ if (instruction_set == kArm64
+ || instruction_set == kX86_64
+ || instruction_set == kArm
+ || instruction_set == kThumb2) {
return true;
}
for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) {
@@ -72,10 +82,12 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph,
!it.Done();
it.Advance()) {
HInstruction* current = it.Current();
- if (current->GetType() == Primitive::kPrimLong ||
- current->GetType() == Primitive::kPrimFloat ||
- current->GetType() == Primitive::kPrimDouble) {
- return false;
+ if (instruction_set == kX86) {
+ if (current->GetType() == Primitive::kPrimLong ||
+ current->GetType() == Primitive::kPrimFloat ||
+ current->GetType() == Primitive::kPrimDouble) {
+ return false;
+ }
}
}
}
@@ -130,7 +142,7 @@ void RegisterAllocator::BlockRegister(Location location,
: physical_fp_register_intervals_.Get(reg);
Primitive::Type type = location.IsRegister()
? Primitive::kPrimInt
- : Primitive::kPrimDouble;
+ : Primitive::kPrimFloat;
if (interval == nullptr) {
interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
if (location.IsRegister()) {
@@ -173,9 +185,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
}
LinearScan();
- size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
- maximum_number_of_live_registers_ = 0;
-
inactive_.Reset();
active_.Reset();
handled_.Reset();
@@ -195,7 +204,6 @@ void RegisterAllocator::AllocateRegistersInternal() {
}
}
LinearScan();
- maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
}
void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -226,6 +234,12 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
temp_intervals_.Add(interval);
interval->AddRange(position, position + 1);
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ interval->AddHighInterval(true);
+ LiveInterval* high = interval->GetHighInterval();
+ temp_intervals_.Add(high);
+ unhandled_fp_intervals_.Add(high);
+ }
unhandled_fp_intervals_.Add(interval);
break;
}
@@ -279,6 +293,9 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
Location input = locations->InAt(i);
if (input.IsRegister() || input.IsFpuRegister()) {
BlockRegister(input, position, position + 1);
+ } else if (input.IsPair()) {
+ BlockRegister(input.ToLow(), position, position + 1);
+ BlockRegister(input.ToHigh(), position, position + 1);
}
}
@@ -291,6 +308,10 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
+ if (codegen_->NeedsTwoRegisters(current->GetType())) {
+ current->AddHighInterval();
+ }
+
// Some instructions define their output in fixed register/stack slot. We need
// to ensure we know these locations before doing register allocation. For a
// given register, we create an interval that covers these locations. The register
@@ -304,14 +325,30 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
if (first.IsRegister() || first.IsFpuRegister()) {
current->SetFrom(position + 1);
current->SetRegister(first.reg());
+ } else if (first.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(first.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(first.high());
+ high->SetFrom(position + 1);
}
} else if (output.IsRegister() || output.IsFpuRegister()) {
// Shift the interval's start by one to account for the blocked register.
current->SetFrom(position + 1);
current->SetRegister(output.reg());
BlockRegister(output, position, position + 1);
+ } else if (output.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(output.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(output.high());
+ high->SetFrom(position + 1);
+ BlockRegister(output.ToLow(), position, position + 1);
+ BlockRegister(output.ToHigh(), position, position + 1);
} else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
current->SetSpillSlot(output.GetStackIndex());
+ } else {
+ DCHECK(output.IsUnallocated() || output.IsConstant());
}
// If needed, add interval to the list of unhandled intervals.
@@ -516,6 +553,7 @@ void RegisterAllocator::LinearScan() {
LiveInterval* current = unhandled_->Pop();
DCHECK(!current->IsFixed() && !current->HasSpillSlot());
DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
+ DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval());
size_t position = current->GetStart();
@@ -560,12 +598,24 @@ void RegisterAllocator::LinearScan() {
if (current->IsSlowPathSafepoint()) {
// Synthesized interval to record the maximum number of live registers
// at safepoints. No need to allocate a register for it.
- maximum_number_of_live_registers_ =
- std::max(maximum_number_of_live_registers_, active_.Size());
+ if (processing_core_registers_) {
+ maximum_number_of_live_core_registers_ =
+ std::max(maximum_number_of_live_core_registers_, active_.Size());
+ } else {
+ maximum_number_of_live_fp_registers_ =
+ std::max(maximum_number_of_live_fp_registers_, active_.Size());
+ }
DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
continue;
}
+ if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+ DCHECK(!current->HasRegister());
+ // Allocating the low part was unsucessful. The splitted interval for the high part
+ // will be handled next (it is in the `unhandled_` list).
+ continue;
+ }
+
// (4) Try to find an available register.
bool success = TryAllocateFreeReg(current);
@@ -578,6 +628,9 @@ void RegisterAllocator::LinearScan() {
// intervals.
if (success) {
active_.Add(current);
+ if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+ current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+ }
}
}
}
@@ -626,30 +679,35 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
}
}
- int reg = -1;
+ int reg = kNoRegister;
if (current->HasRegister()) {
// Some instructions have a fixed register output.
reg = current->GetRegister();
- DCHECK_NE(free_until[reg], 0u);
+ if (free_until[reg] == 0) {
+ DCHECK(current->IsHighInterval());
+ // AllocateBlockedReg will spill the holder of the register.
+ return false;
+ }
} else {
+ DCHECK(!current->IsHighInterval());
int hint = current->FindFirstRegisterHint(free_until);
if (hint != kNoRegister) {
DCHECK(!IsBlocked(hint));
reg = hint;
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(free_until, current->GetStart());
} else {
- // Pick the register that is free the longest.
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) continue;
- if (reg == -1 || free_until[i] > free_until[reg]) {
- reg = i;
- if (free_until[i] == kMaxLifetimePosition) break;
- }
- }
+ reg = FindAvailableRegister(free_until);
}
}
+ DCHECK_NE(reg, kNoRegister);
// If we could not find a register, we need to spill.
- if (reg == -1 || free_until[reg] == 0) {
+ if (free_until[reg] == 0) {
+ return false;
+ }
+
+ if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) {
return false;
}
@@ -671,6 +729,66 @@ bool RegisterAllocator::IsBlocked(int reg) const {
: blocked_fp_registers_[reg];
}
+int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+ int reg = kNoRegister;
+ // Pick the register pair that is used the last.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) continue;
+ if (!IsLowRegister(i)) continue;
+ int high_register = GetHighForLowRegister(i);
+ if (IsBlocked(high_register)) continue;
+ int existing_high_register = GetHighForLowRegister(reg);
+ if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
+ && next_use[high_register] >= next_use[existing_high_register])) {
+ reg = i;
+ if (next_use[i] == kMaxLifetimePosition
+ && next_use[high_register] == kMaxLifetimePosition) {
+ break;
+ }
+ } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+ // If one of the current register is known to be unavailable, just unconditionally
+ // try a new one.
+ reg = i;
+ }
+ }
+ return reg;
+}
+
+int RegisterAllocator::FindAvailableRegister(size_t* next_use) const {
+ int reg = kNoRegister;
+ // Pick the register that is used the last.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) continue;
+ if (reg == kNoRegister || next_use[i] > next_use[reg]) {
+ reg = i;
+ if (next_use[i] == kMaxLifetimePosition) break;
+ }
+ }
+ return reg;
+}
+
+bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use) {
+ for (size_t i = 0, e = active_.Size(); i < e; ++i) {
+ LiveInterval* active = active_.Get(i);
+ DCHECK(active->HasRegister());
+ // Split the first interval found.
+ if (first_register_use <= next_use[active->GetRegister()]
+ && !active->IsLowInterval()
+ && !active->IsHighInterval()) {
+ LiveInterval* split = Split(active, position);
+ active_.DeleteAt(i);
+ if (split != active) {
+ handled_.Add(active);
+ }
+ AddSorted(unhandled_, split);
+ return true;
+ }
+ }
+ return false;
+}
+
// Find the register that is used the last, and spill the interval
// that holds it. If the first use of `current` is after that register
// we spill `current` instead.
@@ -731,24 +849,50 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
}
}
- // Pick the register that is used the last.
- int reg = -1;
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) continue;
- if (reg == -1 || next_use[i] > next_use[reg]) {
- reg = i;
- if (next_use[i] == kMaxLifetimePosition) break;
- }
+ int reg = kNoRegister;
+ bool should_spill = false;
+ if (current->HasRegister()) {
+ DCHECK(current->IsHighInterval());
+ reg = current->GetRegister();
+ // When allocating the low part, we made sure the high register was available.
+ DCHECK_LT(first_register_use, next_use[reg]);
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(next_use, current->GetStart());
+ // We should spill if both registers are not available.
+ should_spill = (first_register_use >= next_use[reg])
+ || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
+ } else {
+ DCHECK(!current->IsHighInterval());
+ reg = FindAvailableRegister(next_use);
+ should_spill = (first_register_use >= next_use[reg]);
}
- if (first_register_use >= next_use[reg]) {
- // If the first use of that instruction is after the last use of the found
- // register, we split this interval just before its first register use.
- AllocateSpillSlotFor(current);
- LiveInterval* split = Split(current, first_register_use - 1);
- DCHECK_NE(current, split) << "There is not enough registers available for "
- << split->GetParent()->GetDefinedBy()->DebugName();
- AddSorted(unhandled_, split);
+ DCHECK_NE(reg, kNoRegister);
+ if (should_spill) {
+ DCHECK(!current->IsHighInterval());
+ bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1));
+ if (current->IsLowInterval()
+ && is_allocation_at_use_site
+ && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) {
+ // If we're allocating a register for `current` because the instruction at
+ // that position requires it, but we think we should spill, then there are
+ // non-pair intervals blocking the allocation. We split the first
+ // interval found, and put ourselves first in the `unhandled_` list.
+ LiveInterval* existing = unhandled_->Peek();
+ DCHECK(existing->IsHighInterval());
+ DCHECK_EQ(existing->GetLowInterval(), current);
+ unhandled_->Add(current);
+ } else {
+ // If the first use of that instruction is after the last use of the found
+ // register, we split this interval just before its first register use.
+ AllocateSpillSlotFor(current);
+ LiveInterval* split = Split(current, first_register_use - 1);
+ DCHECK_NE(current, split) << "There is not enough registers available for "
+ << split->GetParent()->GetDefinedBy()->DebugName() << " "
+ << split->GetParent()->GetDefinedBy()->GetId()
+ << " at " << first_register_use - 1;
+ AddSorted(unhandled_, split);
+ }
return false;
} else {
// Use this register and spill the active and inactives interval that
@@ -761,8 +905,27 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
DCHECK(!active->IsFixed());
LiveInterval* split = Split(active, current->GetStart());
active_.DeleteAt(i);
- handled_.Add(active);
+ if (split != active) {
+ handled_.Add(active);
+ }
AddSorted(unhandled_, split);
+
+ if (active->IsLowInterval() || active->IsHighInterval()) {
+ LiveInterval* other_half = active->IsLowInterval()
+ ? active->GetHighInterval()
+ : active->GetLowInterval();
+ // We also need to remove the other half from the list of actives.
+ bool found = false;
+ for (size_t j = 0; j < active_.Size(); ++j) {
+ if (active_.Get(j) == other_half) {
+ found = true;
+ active_.DeleteAt(j);
+ handled_.Add(other_half);
+ break;
+ }
+ }
+ DCHECK(found);
+ }
break;
}
}
@@ -782,14 +945,38 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
if (next_intersection != kNoLifetime) {
if (inactive->IsFixed()) {
LiveInterval* split = Split(current, next_intersection);
+ DCHECK_NE(split, current);
AddSorted(unhandled_, split);
} else {
- LiveInterval* split = Split(inactive, next_intersection);
+ // Split at the start of `current`, which will lead to splitting
+ // at the end of the lifetime hole of `inactive`.
+ LiveInterval* split = Split(inactive, current->GetStart());
+ // If it's inactive, it must start before the current interval.
+ DCHECK_NE(split, inactive);
inactive_.DeleteAt(i);
--i;
--e;
handled_.Add(inactive);
AddSorted(unhandled_, split);
+
+ if (inactive->IsLowInterval() || inactive->IsHighInterval()) {
+ LiveInterval* other_half = inactive->IsLowInterval()
+ ? inactive->GetHighInterval()
+ : inactive->GetLowInterval();
+
+ // We also need to remove the other half from the list of inactives.
+ bool found = false;
+ for (size_t j = 0; j < inactive_.Size(); ++j) {
+ if (inactive_.Get(j) == other_half) {
+ found = true;
+ inactive_.DeleteAt(j);
+ --e;
+ handled_.Add(other_half);
+ break;
+ }
+ }
+ DCHECK(found);
+ }
}
}
}
@@ -804,7 +991,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter
size_t insert_at = 0;
for (size_t i = array->Size(); i > 0; --i) {
LiveInterval* current = array->Get(i - 1);
- if (current->StartsAfter(interval)) {
+ // High intervals must be processed right after their low equivalent.
+ if (current->StartsAfter(interval) && !current->IsHighInterval()) {
insert_at = i;
break;
} else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
@@ -815,23 +1003,49 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter
break;
}
}
+
array->InsertAt(insert_at, interval);
+ // Insert the high interval before the low, to ensure the low is processed before.
+ if (interval->HasHighInterval()) {
+ array->InsertAt(insert_at, interval->GetHighInterval());
+ } else if (interval->HasLowInterval()) {
+ array->InsertAt(insert_at + 1, interval->GetLowInterval());
+ }
}
LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
- DCHECK(position >= interval->GetStart());
+ DCHECK_GE(position, interval->GetStart());
DCHECK(!interval->IsDeadAt(position));
if (position == interval->GetStart()) {
// Spill slot will be allocated when handling `interval` again.
interval->ClearRegister();
+ if (interval->HasHighInterval()) {
+ interval->GetHighInterval()->ClearRegister();
+ } else if (interval->HasLowInterval()) {
+ interval->GetLowInterval()->ClearRegister();
+ }
return interval;
} else {
LiveInterval* new_interval = interval->SplitAt(position);
+ if (interval->HasHighInterval()) {
+ LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+ new_interval->SetHighInterval(high);
+ high->SetLowInterval(new_interval);
+ } else if (interval->HasLowInterval()) {
+ LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+ new_interval->SetLowInterval(low);
+ low->SetHighInterval(new_interval);
+ }
return new_interval;
}
}
void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
+ if (interval->IsHighInterval()) {
+ // The low interval will contain the spill slot.
+ return;
+ }
+
LiveInterval* parent = interval->GetParent();
// An instruction gets a spill slot for its entire lifetime. If the parent
@@ -897,7 +1111,9 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
static bool IsValidDestination(Location destination) {
return destination.IsRegister()
+ || destination.IsRegisterPair()
|| destination.IsFpuRegister()
+ || destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
|| destination.IsDoubleStackSlot();
}
@@ -905,7 +1121,6 @@ static bool IsValidDestination(Location destination) {
void RegisterAllocator::AddInputMoveFor(HInstruction* user,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
if (source.Equals(destination)) return;
DCHECK(!user->IsPhi());
@@ -922,7 +1137,7 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user,
move = previous->AsParallelMove();
}
DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
- move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr));
+ move->AddMove(source, destination, nullptr);
}
static bool IsInstructionStart(size_t position) {
@@ -937,7 +1152,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
@@ -994,14 +1209,14 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
}
}
DCHECK_EQ(move->GetLifetimePosition(), position);
- move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
+ move->AddMove(source, destination, instruction);
}
void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
DCHECK_EQ(block->GetSuccessors().Size(), 1u);
@@ -1024,14 +1239,14 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
} else {
move = previous->AsParallelMove();
}
- move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
+ move->AddMove(source, destination, instruction);
}
void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
HInstruction* first = block->GetFirstInstruction();
@@ -1043,13 +1258,13 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
move->SetLifetimePosition(block->GetLifetimeStart());
block->InsertInstructionBefore(move, first);
}
- move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
+ move->AddMove(source, destination, instruction);
}
void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
if (instruction->IsPhi()) {
@@ -1067,7 +1282,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
move->SetLifetimePosition(position);
instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
}
- move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
+ move->AddMove(source, destination, instruction);
}
void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
@@ -1075,9 +1290,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
if (current->HasSpillSlot() && current->HasRegister()) {
// We spill eagerly, so move must be at definition.
InsertMoveAfter(interval->GetDefinedBy(),
- interval->IsFloatingPoint()
- ? Location::FpuRegisterLocation(interval->GetRegister())
- : Location::RegisterLocation(interval->GetRegister()),
+ interval->ToLocation(),
interval->NeedsTwoSpillSlots()
? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
: Location::StackSlot(interval->GetParent()->GetSpillSlot()));
@@ -1097,10 +1310,17 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
locations->SetEnvironmentAt(use->GetInputIndex(), source);
} else {
Location expected_location = locations->InAt(use->GetInputIndex());
- if (expected_location.IsUnallocated()) {
- locations->SetInAt(use->GetInputIndex(), source);
- } else if (!expected_location.IsConstant()) {
- AddInputMoveFor(use->GetUser(), source, expected_location);
+ // The expected (actual) location may be invalid in case the input is unused. Currently
+ // this only happens for intrinsics.
+ if (expected_location.IsValid()) {
+ if (expected_location.IsUnallocated()) {
+ locations->SetInAt(use->GetInputIndex(), source);
+ } else if (!expected_location.IsConstant()) {
+ AddInputMoveFor(use->GetUser(), source, expected_location);
+ }
+ } else {
+ DCHECK(use->GetUser()->IsInvoke());
+ DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
}
}
use = use->GetNext();
@@ -1137,8 +1357,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
switch (source.GetKind()) {
case Location::kRegister: {
locations->AddLiveRegister(source);
- DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
-
+ DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+ maximum_number_of_live_core_registers_ +
+ maximum_number_of_live_fp_registers_);
if (current->GetType() == Primitive::kPrimNot) {
locations->SetRegisterBit(source.reg());
}
@@ -1148,6 +1369,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
locations->AddLiveRegister(source);
break;
}
+
+ case Location::kRegisterPair:
+ case Location::kFpuRegisterPair: {
+ locations->AddLiveRegister(source.ToLow());
+ locations->AddLiveRegister(source.ToHigh());
+ break;
+ }
case Location::kStackSlot: // Fall-through
case Location::kDoubleStackSlot: // Fall-through
case Location::kConstant: {
@@ -1226,7 +1454,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
void RegisterAllocator::Resolve() {
codegen_->ComputeFrameSize(
- spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
+ spill_slots_.Size(), maximum_number_of_live_core_registers_,
+ maximum_number_of_live_fp_registers_, reserved_out_slots_);
// Adjust the Out Location of instructions.
// TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
@@ -1307,6 +1536,10 @@ void RegisterAllocator::Resolve() {
size_t temp_index = 0;
for (size_t i = 0; i < temp_intervals_.Size(); ++i) {
LiveInterval* temp = temp_intervals_.Get(i);
+ if (temp->IsHighInterval()) {
+ // High intervals can be skipped, they are already handled by the low interval.
+ continue;
+ }
HInstruction* at = liveness_.GetTempUser(temp);
if (at != current) {
temp_index = 0;
@@ -1320,14 +1553,14 @@ void RegisterAllocator::Resolve() {
break;
case Primitive::kPrimDouble:
- // TODO: Support the case of ARM, where a double value
- // requires an FPU register pair (note that the ARM back end
- // does not yet use this register allocator when a method uses
- // floats or doubles).
- DCHECK(codegen_->GetInstructionSet() != kArm
- && codegen_->GetInstructionSet() != kThumb2);
- locations->SetTempAt(
- temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ Location location = Location::FpuRegisterPairLocation(
+ temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+ locations->SetTempAt(temp_index++, location);
+ } else {
+ locations->SetTempAt(
+ temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+ }
break;
default:
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index cbe741c2b3..b8f70bdc18 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -128,6 +128,12 @@ class RegisterAllocator {
bool ValidateInternal(bool log_fatal_on_failure) const;
void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
void DumpAllIntervals(std::ostream& stream) const;
+ int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
+ int FindAvailableRegister(size_t* next_use) const;
+
+ // Try splitting an active non-pair interval at the given `position`.
+ // Returns whether it was successful at finding such an interval.
+ bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use);
ArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
@@ -188,10 +194,14 @@ class RegisterAllocator {
// Slots reserved for out arguments.
size_t reserved_out_slots_;
- // The maximum live registers at safepoints.
- size_t maximum_number_of_live_registers_;
+ // The maximum live core registers at safepoints.
+ size_t maximum_number_of_live_core_registers_;
+
+ // The maximum live FP registers at safepoints.
+ size_t maximum_number_of_live_fp_registers_;
ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
};
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index f677e840ef..cb5010afcd 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -19,6 +19,7 @@
#include "code_generator_x86.h"
#include "dex_file.h"
#include "dex_instruction.h"
+#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "register_allocator.h"
@@ -40,7 +41,7 @@ static bool Check(const uint16_t* data) {
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
HGraph* graph = builder.BuildGraph(*item);
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -56,7 +57,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = new (&allocator) HGraph(&allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
GrowableArray<LiveInterval*> intervals(&allocator, 0);
// Test with two intervals of the same range.
@@ -295,7 +296,7 @@ TEST(RegisterAllocatorTest, Loop3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -327,7 +328,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -380,7 +381,7 @@ TEST(RegisterAllocatorTest, DeadPhi) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -402,7 +403,7 @@ TEST(RegisterAllocatorTest, FreeUntil) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -462,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
entry->AddSuccessor(block);
HInstruction* test = new (allocator) HInstanceFieldGet(
- parameter, Primitive::kPrimBoolean, MemberOffset(22));
+ parameter, Primitive::kPrimBoolean, MemberOffset(22), false);
block->AddInstruction(test);
block->AddInstruction(new (allocator) HIf(test));
HBasicBlock* then = new (allocator) HBasicBlock(graph);
@@ -481,8 +482,10 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
*phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
join->AddPhi(*phi);
- *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42));
- *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42));
+ *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt,
+ MemberOffset(42), false);
+ *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt,
+ MemberOffset(42), false);
then->AddInstruction(*input1);
else_->AddInstruction(*input2);
join->AddInstruction(new (allocator) HExit());
@@ -502,7 +505,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -517,7 +520,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -534,7 +537,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -551,7 +554,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -581,7 +584,8 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
graph->AddBlock(block);
entry->AddSuccessor(block);
- *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42));
+ *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt,
+ MemberOffset(42), false);
block->AddInstruction(*field);
*ret = new (allocator) HReturn(*field);
block->AddInstruction(*ret);
@@ -600,7 +604,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -613,7 +617,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -662,7 +666,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -676,7 +680,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -723,7 +727,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
{
HGraph* graph = BuildDiv(&allocator, &div);
- x86::CodeGeneratorX86 codegen(graph);
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -735,4 +739,106 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
}
}
+// Test a bug in the register allocator, where allocating a blocked
+// register would lead to spilling an inactive interval at the wrong
+// position.
+TEST(RegisterAllocatorTest, SpillInactive) {
+ ArenaPool pool;
+
+ // Create a synthesized graph to please the register_allocator and
+ // ssa_liveness_analysis code.
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+ entry->AddInstruction(one);
+ entry->AddInstruction(two);
+ entry->AddInstruction(three);
+ entry->AddInstruction(four);
+
+ HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ block->AddInstruction(new (&allocator) HExit());
+
+ // We create a synthesized user requesting a register, to avoid just spilling the
+ // intervals.
+ HPhi* user = new (&allocator) HPhi(&allocator, 0, 1, Primitive::kPrimInt);
+ user->AddInput(one);
+ user->SetBlock(block);
+ LocationSummary* locations = new (&allocator) LocationSummary(user, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ static constexpr size_t phi_ranges[][2] = {{20, 30}};
+ BuildInterval(phi_ranges, arraysize(phi_ranges), &allocator, -1, user);
+
+ // Create an interval with lifetime holes.
+ static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
+ LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
+ first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_);
+ first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_);
+
+ locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+ first = first->SplitAt(1);
+
+ // Create an interval that conflicts with the next interval, to force the next
+ // interval to call `AllocateBlockedReg`.
+ static constexpr size_t ranges2[][2] = {{2, 4}};
+ LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), &allocator, -1, two);
+ locations = new (&allocator) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+
+ // Create an interval that will lead to splitting the first interval. The bug occured
+ // by splitting at a wrong position, in this case at the next intersection between
+ // this interval and the first interval. We would have then put the interval with ranges
+ // "[0, 2(, [4, 6(" in the list of handled intervals, even though we haven't processed intervals
+ // before lifetime position 6 yet.
+ static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
+ LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
+ third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_);
+ third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_);
+ locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+ third = third->SplitAt(3);
+
+ // Because the first part of the split interval was considered handled, this interval
+ // was free to allocate the same register, even though it conflicts with it.
+ static constexpr size_t ranges4[][2] = {{4, 6}};
+ LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), &allocator, -1, four);
+ locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+
+ x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ SsaLivenessAnalysis liveness(*graph, &codegen);
+
+ RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+ register_allocator.unhandled_core_intervals_.Add(fourth);
+ register_allocator.unhandled_core_intervals_.Add(third);
+ register_allocator.unhandled_core_intervals_.Add(second);
+ register_allocator.unhandled_core_intervals_.Add(first);
+
+ // Set just one register available to make all intervals compete for the same.
+ register_allocator.number_of_registers_ = 1;
+ register_allocator.registers_array_ = allocator.AllocArray<size_t>(1);
+ register_allocator.processing_core_registers_ = true;
+ register_allocator.unhandled_ = &register_allocator.unhandled_core_intervals_;
+ register_allocator.LinearScan();
+
+ // Test that there is no conflicts between intervals.
+ GrowableArray<LiveInterval*> intervals(&allocator, 0);
+ intervals.Add(first);
+ intervals.Add(second);
+ intervals.Add(third);
+ intervals.Add(fourth);
+ ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
+ intervals, 0, 0, codegen, &allocator, true, false));
+}
+
} // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 660a5c5f60..d41157b8d8 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -419,10 +419,21 @@ bool LiveInterval::NeedsTwoSpillSlots() const {
}
Location LiveInterval::ToLocation() const {
+ DCHECK(!IsHighInterval());
if (HasRegister()) {
- return IsFloatingPoint()
- ? Location::FpuRegisterLocation(GetRegister())
- : Location::RegisterLocation(GetRegister());
+ if (IsFloatingPoint()) {
+ if (HasHighInterval()) {
+ return Location::FpuRegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister());
+ } else {
+ return Location::FpuRegisterLocation(GetRegister());
+ }
+ } else {
+ if (HasHighInterval()) {
+ return Location::RegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister());
+ } else {
+ return Location::RegisterLocation(GetRegister());
+ }
+ }
} else {
HInstruction* defined_by = GetParent()->GetDefinedBy();
if (defined_by->IsConstant()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 23123891ef..a123313426 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -77,6 +77,15 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> {
stream << "[" << start_ << ", " << end_ << ")";
}
+ LiveRange* Dup(ArenaAllocator* allocator) const {
+ return new (allocator) LiveRange(
+ start_, end_, next_ == nullptr ? nullptr : next_->Dup(allocator));
+ }
+
+ LiveRange* GetLastRange() {
+ return next_ == nullptr ? this : next_->GetLastRange();
+ }
+
private:
size_t start_;
size_t end_;
@@ -123,6 +132,12 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
stream << position_;
}
+ UsePosition* Dup(ArenaAllocator* allocator) const {
+ return new (allocator) UsePosition(
+ user_, input_index_, is_environment_, position_,
+ next_ == nullptr ? nullptr : next_->Dup(allocator));
+ }
+
private:
HInstruction* const user_;
const size_t input_index_;
@@ -414,7 +429,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
LiveRange* current = first_range_;
LiveRange* previous = nullptr;
// Iterate over the ranges, and either find a range that covers this position, or
- // a two ranges in between this position (that is, the position is in a lifetime hole).
+ // two ranges in between this position (that is, the position is in a lifetime hole).
do {
if (position >= current->GetEnd()) {
// Move to next range.
@@ -478,6 +493,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
}
stream << "}";
stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
+ stream << " is_high: " << IsHighInterval();
+ stream << " is_low: " << IsLowInterval();
}
LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -512,6 +529,58 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// Returns whether `other` and `this` share the same kind of register.
bool SameRegisterKind(Location other) const;
+ bool HasHighInterval() const {
+ return IsLowInterval();
+ }
+
+ bool HasLowInterval() const {
+ return IsHighInterval();
+ }
+
+ LiveInterval* GetLowInterval() const {
+ DCHECK(HasLowInterval());
+ return high_or_low_interval_;
+ }
+
+ LiveInterval* GetHighInterval() const {
+ DCHECK(HasHighInterval());
+ return high_or_low_interval_;
+ }
+
+ bool IsHighInterval() const {
+ return GetParent()->is_high_interval_;
+ }
+
+ bool IsLowInterval() const {
+ return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr);
+ }
+
+ void SetLowInterval(LiveInterval* low) {
+ DCHECK(IsHighInterval());
+ high_or_low_interval_ = low;
+ }
+
+ void SetHighInterval(LiveInterval* high) {
+ DCHECK(IsLowInterval());
+ high_or_low_interval_ = high;
+ }
+
+ void AddHighInterval(bool is_temp = false) {
+ DCHECK_EQ(GetParent(), this);
+ DCHECK(!HasHighInterval());
+ DCHECK(!HasLowInterval());
+ high_or_low_interval_ = new (allocator_) LiveInterval(
+ allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true);
+ high_or_low_interval_->high_or_low_interval_ = this;
+ if (first_range_ != nullptr) {
+ high_or_low_interval_->first_range_ = first_range_->Dup(allocator_);
+ high_or_low_interval_->last_range_ = first_range_->GetLastRange();
+ }
+ if (first_use_ != nullptr) {
+ high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
+ }
+ }
+
private:
LiveInterval(ArenaAllocator* allocator,
Primitive::Type type,
@@ -519,7 +588,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
bool is_fixed = false,
int reg = kNoRegister,
bool is_temp = false,
- bool is_slow_path_safepoint = false)
+ bool is_slow_path_safepoint = false,
+ bool is_high_interval = false)
: allocator_(allocator),
first_range_(nullptr),
last_range_(nullptr),
@@ -532,6 +602,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
is_fixed_(is_fixed),
is_temp_(is_temp),
is_slow_path_safepoint_(is_slow_path_safepoint),
+ is_high_interval_(is_high_interval),
+ high_or_low_interval_(nullptr),
defined_by_(defined_by) {}
ArenaAllocator* const allocator_;
@@ -568,12 +640,21 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// Whether the interval is for a safepoint that calls on slow path.
const bool is_slow_path_safepoint_;
+ // Whether this interval is a synthesized interval for register pair.
+ const bool is_high_interval_;
+
+ // If this interval needs a register pair, the high or low equivalent.
+ // `is_high_interval_` tells whether this holds the low or the high.
+ LiveInterval* high_or_low_interval_;
+
// The instruction represented by this interval.
HInstruction* const defined_by_;
static constexpr int kNoRegister = -1;
static constexpr int kNoSpillSlot = -1;
+ ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
+
DISALLOW_COPY_AND_ASSIGN(LiveInterval);
};