summaryrefslogtreecommitdiffstats
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.cc235
-rw-r--r--compiler/optimizing/code_generator.h67
-rw-r--r--compiler/optimizing/code_generator_arm.cc72
-rw-r--r--compiler/optimizing/code_generator_arm.h3
-rw-r--r--compiler/optimizing/code_generator_arm64.cc60
-rw-r--r--compiler/optimizing/code_generator_arm64.h5
-rw-r--r--compiler/optimizing/code_generator_x86.cc276
-rw-r--r--compiler/optimizing/code_generator_x86.h2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc36
-rw-r--r--compiler/optimizing/constant_folding.cc180
-rw-r--r--compiler/optimizing/graph_checker.cc6
-rw-r--r--compiler/optimizing/gvn.cc339
-rw-r--r--compiler/optimizing/inliner.cc13
-rw-r--r--compiler/optimizing/instruction_simplifier.cc265
-rw-r--r--compiler/optimizing/intrinsics_arm.cc5
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc5
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc5
-rw-r--r--compiler/optimizing/liveness_test.cc56
-rw-r--r--compiler/optimizing/locations.h22
-rw-r--r--compiler/optimizing/nodes.cc36
-rw-r--r--compiler/optimizing/nodes.h94
-rw-r--r--compiler/optimizing/optimizing_compiler.cc112
-rw-r--r--compiler/optimizing/primitive_type_propagation.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc126
-rw-r--r--compiler/optimizing/register_allocator.h24
-rw-r--r--compiler/optimizing/ssa_builder.cc231
-rw-r--r--compiler/optimizing/ssa_builder.h22
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc5
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h30
-rw-r--r--compiler/optimizing/ssa_test.cc19
-rw-r--r--compiler/optimizing/stack_map_stream.h143
-rw-r--r--compiler/optimizing/stack_map_test.cc161
32 files changed, 1989 insertions, 668 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ed3f949afe..561dcb7315 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -547,7 +547,9 @@ void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
stack_map_stream_.FillIn(region);
}
-void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
+void CodeGenerator::RecordPcInfo(HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path) {
if (instruction != nullptr) {
// The code generated for some type conversions may call the
// runtime, thus normally requiring a subsequent call to this
@@ -577,20 +579,17 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
pc_info.native_pc = GetAssembler()->CodeSize();
pc_infos_.Add(pc_info);
- // Populate stack map information.
+ uint32_t inlining_depth = 0;
if (instruction == nullptr) {
// For stack overflow checks.
- stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, 0);
+ stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth);
return;
}
-
LocationSummary* locations = instruction->GetLocations();
HEnvironment* environment = instruction->GetEnvironment();
-
size_t environment_size = instruction->EnvironmentSize();
- size_t inlining_depth = 0;
uint32_t register_mask = locations->GetRegisterMask();
if (locations->OnlyCallsOnSlowPath()) {
// In case of slow path, we currently set the location of caller-save registers
@@ -602,15 +601,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
}
// The register mask must be a subset of callee-save registers.
DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
- stack_map_stream_.AddStackMapEntry(
- dex_pc, pc_info.native_pc, register_mask,
- locations->GetStackMask(), environment_size, inlining_depth);
+ stack_map_stream_.AddStackMapEntry(dex_pc,
+ pc_info.native_pc,
+ register_mask,
+ locations->GetStackMask(),
+ environment_size,
+ inlining_depth);
// Walk over the environment, and record the location of dex registers.
for (size_t i = 0; i < environment_size; ++i) {
HInstruction* current = environment->GetInstructionAt(i);
if (current == nullptr) {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kNone, 0);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0);
continue;
}
@@ -620,81 +622,132 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
DCHECK_EQ(current, location.GetConstant());
if (current->IsLongConstant()) {
int64_t value = current->AsLongConstant()->GetValue();
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value));
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value));
- ++i;
+ stack_map_stream_.AddDexRegisterEntry(
+ i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
DCHECK_LT(i, environment_size);
} else if (current->IsDoubleConstant()) {
int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue());
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value));
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, High32Bits(value));
- ++i;
+ stack_map_stream_.AddDexRegisterEntry(
+ i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
DCHECK_LT(i, environment_size);
} else if (current->IsIntConstant()) {
int32_t value = current->AsIntConstant()->GetValue();
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
} else if (current->IsNullConstant()) {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
} else {
- DCHECK(current->IsFloatConstant());
+ DCHECK(current->IsFloatConstant()) << current->DebugName();
int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
}
break;
}
case Location::kStackSlot: {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex());
+ stack_map_stream_.AddDexRegisterEntry(
+ i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
break;
}
case Location::kDoubleStackSlot: {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack, location.GetStackIndex());
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInStack,
- location.GetHighStackIndex(kVRegSize));
- ++i;
+ stack_map_stream_.AddDexRegisterEntry(
+ i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
DCHECK_LT(i, environment_size);
break;
}
case Location::kRegister : {
int id = location.reg();
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
- if (current->GetType() == Primitive::kPrimLong) {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, id);
- ++i;
- DCHECK_LT(i, environment_size);
+ if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) {
+ uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+ if (current->GetType() == Primitive::kPrimLong) {
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+ DCHECK_LT(i, environment_size);
+ }
+ } else {
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, id);
+ if (current->GetType() == Primitive::kPrimLong) {
+ stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInRegister, id);
+ DCHECK_LT(i, environment_size);
+ }
}
break;
}
case Location::kFpuRegister : {
int id = location.reg();
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
- if (current->GetType() == Primitive::kPrimDouble) {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, id);
- ++i;
- DCHECK_LT(i, environment_size);
+ if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) {
+ uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+ if (current->GetType() == Primitive::kPrimDouble) {
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+ DCHECK_LT(i, environment_size);
+ }
+ } else {
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, id);
+ if (current->GetType() == Primitive::kPrimDouble) {
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kInFpuRegister, id);
+ DCHECK_LT(i, environment_size);
+ }
}
break;
}
case Location::kFpuRegisterPair : {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low());
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high());
- ++i;
+ int low = location.low();
+ int high = location.high();
+ if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) {
+ uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+ } else {
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, low);
+ }
+ if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) {
+ uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high);
+ stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset);
+ } else {
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kInFpuRegister, high);
+ }
DCHECK_LT(i, environment_size);
break;
}
case Location::kRegisterPair : {
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low());
- stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high());
- ++i;
+ int low = location.low();
+ int high = location.high();
+ if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) {
+ uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low);
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset);
+ } else {
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, low);
+ }
+ if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) {
+ uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high);
+ stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset);
+ } else {
+ stack_map_stream_.AddDexRegisterEntry(
+ ++i, DexRegisterLocation::Kind::kInRegister, high);
+ }
DCHECK_LT(i, environment_size);
break;
}
+ case Location::kInvalid: {
+ stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0);
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected kind " << location.GetKind();
}
@@ -735,78 +788,86 @@ void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
}
}
-void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
+void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const {
+ LocationSummary* locations = suspend_check->GetLocations();
+ HBasicBlock* block = suspend_check->GetBlock();
+ DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check);
+ DCHECK(block->IsLoopHeader());
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* current = it.Current();
+ LiveInterval* interval = current->GetLiveInterval();
+ // We only need to clear bits of loop phis containing objects and allocated in register.
+ // Loop phis allocated on stack already have the object in the stack.
+ if (current->GetType() == Primitive::kPrimNot
+ && interval->HasRegister()
+ && interval->HasSpillSlot()) {
+ locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize);
+ }
+ }
+}
+
+void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) {
+ HParallelMove parallel_move(GetGraph()->GetArena());
+ parallel_move.AddMove(from1, to1, nullptr);
+ parallel_move.AddMove(from2, to2, nullptr);
+ GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+void SlowPathCode::RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc) {
+ codegen->RecordPcInfo(instruction, dex_pc, this);
+}
+
+void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
RegisterSet* register_set = locations->GetLiveRegisters();
- size_t stack_offset = first_register_slot_in_slow_path_;
- for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!IsCoreCalleeSaveRegister(i)) {
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (!codegen->IsCoreCalleeSaveRegister(i)) {
if (register_set->ContainsCoreRegister(i)) {
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
locations->SetStackBit(stack_offset / kVRegSize);
}
- DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
- stack_offset += SaveCoreRegister(stack_offset, i);
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += codegen->SaveCoreRegister(stack_offset, i);
}
}
}
- for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!IsFloatingPointCalleeSaveRegister(i)) {
+ for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+ if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
if (register_set->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
- stack_offset += SaveFloatingPointRegister(stack_offset, i);
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i);
}
}
}
}
-void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
+void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
RegisterSet* register_set = locations->GetLiveRegisters();
- size_t stack_offset = first_register_slot_in_slow_path_;
- for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!IsCoreCalleeSaveRegister(i)) {
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (!codegen->IsCoreCalleeSaveRegister(i)) {
if (register_set->ContainsCoreRegister(i)) {
- DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
- stack_offset += RestoreCoreRegister(stack_offset, i);
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
}
}
}
- for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!IsFloatingPointCalleeSaveRegister(i)) {
+ for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+ if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
if (register_set->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
- stack_offset += RestoreFloatingPointRegister(stack_offset, i);
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
}
}
}
}
-void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const {
- LocationSummary* locations = suspend_check->GetLocations();
- HBasicBlock* block = suspend_check->GetBlock();
- DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check);
- DCHECK(block->IsLoopHeader());
-
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HInstruction* current = it.Current();
- LiveInterval* interval = current->GetLiveInterval();
- // We only need to clear bits of loop phis containing objects and allocated in register.
- // Loop phis allocated on stack already have the object in the stack.
- if (current->GetType() == Primitive::kPrimNot
- && interval->HasRegister()
- && interval->HasSpillSlot()) {
- locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize);
- }
- }
-}
-
-void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) {
- HParallelMove parallel_move(GetGraph()->GetArena());
- parallel_move.AddMove(from1, to1, nullptr);
- parallel_move.AddMove(from2, to2, nullptr);
- GetMoveResolver()->EmitNativeCode(&parallel_move);
-}
-
} // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5146afad8d..667f686059 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -29,8 +29,6 @@
namespace art {
-static size_t constexpr kVRegSize = 4;
-
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
// Binary encoding of 2^31 for type double.
@@ -68,12 +66,42 @@ struct PcInfo {
class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
public:
- SlowPathCode() {}
+ SlowPathCode() {
+ for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
+ saved_core_stack_offsets_[i] = kRegisterNotSaved;
+ saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
+ }
+ }
+
virtual ~SlowPathCode() {}
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
+ void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
+
+ bool IsCoreRegisterSaved(int reg) const {
+ return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
+ }
+
+ bool IsFpuRegisterSaved(int reg) const {
+ return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
+ }
+
+ uint32_t GetStackOffsetOfCoreRegister(int reg) const {
+ return saved_core_stack_offsets_[reg];
+ }
+
+ uint32_t GetStackOffsetOfFpuRegister(int reg) const {
+ return saved_fpu_stack_offsets_[reg];
+ }
+
private:
+ static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
+ static constexpr uint32_t kRegisterNotSaved = -1;
+ uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
+ uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
@@ -153,17 +181,13 @@ class CodeGenerator {
virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
// Restores the register from the stack. Returns the size taken on stack.
virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
- virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- UNUSED(stack_index, reg_id);
- UNIMPLEMENTED(FATAL);
- UNREACHABLE();
- }
- virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- UNUSED(stack_index, reg_id);
- UNIMPLEMENTED(FATAL);
- UNREACHABLE();
- }
+
+ virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
+ virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
+
virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
+ // Returns whether we should split long moves in parallel moves.
+ virtual bool ShouldSplitLongMoves() const { return false; }
bool IsCoreCalleeSaveRegister(int reg) const {
return (core_callee_save_mask_ & (1 << reg)) != 0;
@@ -173,7 +197,7 @@ class CodeGenerator {
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
- void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
+ void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
bool CanMoveNullCheckToUser(HNullCheck* null_check);
void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -186,8 +210,6 @@ class CodeGenerator {
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
void BuildStackMaps(std::vector<uint8_t>* vector);
- void SaveLiveRegisters(LocationSummary* locations);
- void RestoreLiveRegisters(LocationSummary* locations);
bool IsLeafMethod() const {
return is_leaf_;
@@ -271,6 +293,15 @@ class CodeGenerator {
}
}
+ size_t GetFirstRegisterSlotInSlowPath() const {
+ return first_register_slot_in_slow_path_;
+ }
+
+ uint32_t FrameEntrySpillSize() const {
+ return GetFpuSpillSize() + GetCoreSpillSize();
+ }
+
+
protected:
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
@@ -330,10 +361,6 @@ class CodeGenerator {
return POPCOUNT(core_spill_mask_) * GetWordSize();
}
- uint32_t FrameEntrySpillSize() const {
- return GetFpuSpillSize() + GetCoreSpillSize();
- }
-
bool HasAllocatedCalleeSaveRegisters() const {
// We check the core registers against 1 because it always comprises the return PC.
return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0403af1eaf..689f5357f4 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -82,7 +82,7 @@ class NullCheckSlowPathARM : public SlowPathCodeARM {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
}
private:
@@ -98,7 +98,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCodeARM {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
}
private:
@@ -114,10 +114,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(instruction_->GetLocations());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc());
- codegen->RestoreLiveRegisters(instruction_->GetLocations());
+ QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+ RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ b(GetReturnLabel());
} else {
@@ -162,7 +162,7 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM {
length_location_,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
}
private:
@@ -188,7 +188,7 @@ class LoadClassSlowPathARM : public SlowPathCodeARM {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
@@ -196,7 +196,7 @@ class LoadClassSlowPathARM : public SlowPathCodeARM {
int32_t entry_point_offset = do_clinit_
? QUICK_ENTRY_POINT(pInitializeStaticStorage)
: QUICK_ENTRY_POINT(pInitializeType);
- arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_);
+ arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
// Move the class to the desired location.
Location out = locations->Out();
@@ -204,7 +204,7 @@ class LoadClassSlowPathARM : public SlowPathCodeARM {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
@@ -235,16 +235,16 @@ class LoadStringSlowPathARM : public SlowPathCodeARM {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
__ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
arm_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
@@ -272,7 +272,7 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
@@ -284,14 +284,15 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
if (instruction_->IsInstanceOf()) {
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_);
+ arm_codegen->InvokeRuntime(
+ QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this);
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
} else {
DCHECK(instruction_->IsCheckCast());
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
@@ -857,10 +858,11 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr
void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset,
HInstruction* instruction,
- uint32_t dex_pc) {
+ uint32_t dex_pc,
+ SlowPathCode* slow_path) {
__ LoadFromOffset(kLoadWord, LR, TR, entry_point_offset);
__ blx(LR);
- RecordPcInfo(instruction, dex_pc);
+ RecordPcInfo(instruction, dex_pc, slow_path);
DCHECK(instruction->IsSuspendCheck()
|| instruction->IsBoundsCheck()
|| instruction->IsNullCheck()
@@ -901,10 +903,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit) {
UNUSED(exit);
- if (kIsDebugBuild) {
- __ Comment("Unreachable");
- __ bkpt(0);
- }
}
void LocationsBuilderARM::VisitIf(HIf* if_instr) {
@@ -1205,6 +1203,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
codegen_->GenerateStaticOrDirectCall(invoke, temp);
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
@@ -1673,14 +1672,16 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
// Processing a Dex `float-to-long' instruction.
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l),
conversion,
- conversion->GetDexPc());
+ conversion->GetDexPc(),
+ nullptr);
break;
case Primitive::kPrimDouble:
// Processing a Dex `double-to-long' instruction.
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l),
conversion,
- conversion->GetDexPc());
+ conversion->GetDexPc(),
+ nullptr);
break;
default:
@@ -2134,7 +2135,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc());
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
break;
}
@@ -2228,17 +2229,17 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
}
case Primitive::kPrimLong: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc());
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
break;
}
case Primitive::kPrimFloat: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc());
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
break;
}
case Primitive::kPrimDouble: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc());
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
break;
}
@@ -2455,7 +2456,8 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
__ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
}
void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -2474,7 +2476,8 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
__ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
}
void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -3204,7 +3207,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
DCHECK_EQ(value_type, Primitive::kPrimNot);
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
}
break;
}
@@ -3691,7 +3695,7 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
codegen_->InvokeRuntime(
- QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc());
+ QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
}
void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3784,7 +3788,8 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr
codegen_->InvokeRuntime(instruction->IsEnter()
? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
}
void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -3888,7 +3893,6 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
__ bl(GetFrameEntryLabel());
}
- RecordPcInfo(invoke, invoke->GetDexPc());
DCHECK(!IsLeafMethod());
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index f1a3729c13..57e1d2f2f5 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -246,7 +246,8 @@ class CodeGeneratorARM : public CodeGenerator {
void LoadCurrentMethod(Register reg);
// Generate code to invoke a runtime entry point.
- void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
+ void InvokeRuntime(
+ int32_t offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path);
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c21084a6fe..aeec5dd1c4 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -147,7 +147,7 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
index_location_, LocationFrom(calling_convention.GetRegisterAt(0)),
length_location_, LocationFrom(calling_convention.GetRegisterAt(1)));
arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
@@ -167,7 +167,7 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
@@ -191,14 +191,14 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex());
arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
: QUICK_ENTRY_POINT(pInitializeType);
- arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_);
+ arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
if (do_clinit_) {
CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>();
} else {
@@ -213,7 +213,7 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
@@ -244,18 +244,18 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
__ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex());
arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>();
Primitive::Type type = instruction_->GetType();
arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
@@ -273,7 +273,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
@@ -292,11 +292,11 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(instruction_->GetLocations());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
arm64_codegen->InvokeRuntime(
- QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc());
+ QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- codegen->RestoreLiveRegisters(instruction_->GetLocations());
+ RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -338,7 +338,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
@@ -348,7 +348,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
object_class_, LocationFrom(calling_convention.GetRegisterAt(1)));
if (instruction_->IsInstanceOf()) {
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_);
+ arm64_codegen->InvokeRuntime(
+ QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_, this);
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
@@ -356,11 +357,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_);
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
}
@@ -984,11 +985,12 @@ void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
HInstruction* instruction,
- uint32_t dex_pc) {
+ uint32_t dex_pc,
+ SlowPathCode* slow_path) {
__ Ldr(lr, MemOperand(tr, entry_point_offset));
__ Blr(lr);
if (instruction != nullptr) {
- RecordPcInfo(instruction, dex_pc);
+ RecordPcInfo(instruction, dex_pc, slow_path);
DCHECK(instruction->IsSuspendCheck()
|| instruction->IsBoundsCheck()
|| instruction->IsNullCheck()
@@ -1298,7 +1300,8 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
Primitive::Type value_type = instruction->GetComponentType();
if (value_type == Primitive::kPrimNot) {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc());
+ codegen_->InvokeRuntime(
+ QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr);
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
} else {
LocationSummary* locations = instruction->GetLocations();
@@ -1593,10 +1596,6 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM64::VisitExit(HExit* exit) {
UNUSED(exit);
- if (kIsDebugBuild) {
- down_cast<Arm64Assembler*>(GetAssembler())->Comment("Unreachable");
- __ Brk(__LINE__); // TODO: Introduce special markers for such code locations.
- }
}
void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
@@ -1920,7 +1919,6 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
__ Bl(&frame_entry_label_);
}
- RecordPcInfo(invoke, invoke->GetDexPc());
DCHECK(!IsLeafMethod());
}
@@ -1931,6 +1929,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
codegen_->GenerateStaticOrDirectCall(invoke, temp);
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -2066,7 +2065,8 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins
codegen_->InvokeRuntime(instruction->IsEnter()
? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
}
@@ -2172,7 +2172,8 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
codegen_->InvokeRuntime(
GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
void*, uint32_t, int32_t, mirror::ArtMethod*>();
}
@@ -2198,7 +2199,8 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
codegen_->InvokeRuntime(
GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
- instruction->GetDexPc());
+ instruction->GetDexPc(),
+ nullptr);
CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>();
}
@@ -2352,7 +2354,7 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
case Primitive::kPrimDouble: {
int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
: QUICK_ENTRY_POINT(pFmod);
- codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc());
+ codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
break;
}
@@ -2527,7 +2529,7 @@ void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
codegen_->InvokeRuntime(
- QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc());
+ QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 48961d68e9..cbb2e5c749 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -300,7 +300,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
// Generate code to invoke a runtime entry point.
- void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
+ void InvokeRuntime(int32_t offset,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path);
ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 07d88deffa..754dd1088d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -158,16 +158,16 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
public:
- explicit SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
+ SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
: instruction_(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(instruction_->GetLocations());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
- codegen->RestoreLiveRegisters(instruction_->GetLocations());
+ RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -198,15 +198,15 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1));
__ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex()));
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -231,7 +231,7 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 {
LocationSummary* locations = at_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex()));
@@ -239,7 +239,7 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 {
__ fs()->call(Address::Absolute(do_clinit_
? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage)
: QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType)));
- codegen->RecordPcInfo(at_, dex_pc_);
+ RecordPcInfo(codegen, at_, dex_pc_);
// Move the class to the desired location.
Location out = locations->Out();
@@ -248,7 +248,7 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 {
x86_codegen->Move32(out, Location::RegisterLocation(EAX));
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -287,7 +287,7 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
@@ -306,11 +306,11 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pCheckCast)));
}
- codegen->RecordPcInfo(instruction_, dex_pc_);
+ RecordPcInfo(codegen, instruction_, dex_pc_);
if (instruction_->IsInstanceOf()) {
x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -673,8 +673,19 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
source.AsRegisterPairHigh<Register>());
} else if (source.IsFpuRegister()) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
+ } else if (source.IsConstant()) {
+ HConstant* constant = source.GetConstant();
+ int64_t value;
+ if (constant->IsLongConstant()) {
+ value = constant->AsLongConstant()->GetValue();
+ } else {
+ DCHECK(constant->IsDoubleConstant());
+ value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+ }
+ __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
+ __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
} else {
- DCHECK(source.IsDoubleStackSlot());
+ DCHECK(source.IsDoubleStackSlot()) << source;
EmitParallelMoves(
Location::StackSlot(source.GetStackIndex()),
Location::StackSlot(destination.GetStackIndex()),
@@ -801,10 +812,6 @@ void LocationsBuilderX86::VisitExit(HExit* exit) {
void InstructionCodeGeneratorX86::VisitExit(HExit* exit) {
UNUSED(exit);
- if (kIsDebugBuild) {
- __ Comment("Unreachable");
- __ int3();
- }
}
void LocationsBuilderX86::VisitIf(HIf* if_instr) {
@@ -1555,8 +1562,6 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
// Processing a Dex `int-to-byte' instruction.
if (in.IsRegister()) {
__ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
- } else if (in.IsStackSlot()) {
- __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
} else {
DCHECK(in.GetConstant()->IsIntConstant());
int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
@@ -1760,6 +1765,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
__ addsd(result, temp);
// result = double-to-float(result)
__ cvtsd2ss(result, result);
+ // Restore low.
+ __ addl(low, Immediate(0x80000000));
break;
}
@@ -1807,6 +1814,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
__ addsd(result, constant);
// result = result + temp
__ addsd(result, temp);
+ // Restore low.
+ __ addl(low, Immediate(0x80000000));
break;
}
@@ -1892,10 +1901,15 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
if (second.IsRegisterPair()) {
__ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
__ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
- } else {
+ } else if (second.IsDoubleStackSlot()) {
__ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
__ adcl(first.AsRegisterPairHigh<Register>(),
Address(ESP, second.GetHighStackIndex(kX86WordSize)));
+ } else {
+ DCHECK(second.IsConstant()) << second;
+ int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+ __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
+ __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
}
break;
}
@@ -1965,10 +1979,15 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
if (second.IsRegisterPair()) {
__ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
__ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
- } else {
+ } else if (second.IsDoubleStackSlot()) {
__ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
__ sbbl(first.AsRegisterPairHigh<Register>(),
Address(ESP, second.GetHighStackIndex(kX86WordSize)));
+ } else {
+ DCHECK(second.IsConstant()) << second;
+ int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+ __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
+ __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
}
break;
}
@@ -1999,12 +2018,6 @@ void LocationsBuilderX86::VisitMul(HMul* mul) {
break;
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- // TODO: Currently this handles only stack operands:
- // - we don't have enough registers because we currently use Quick ABI.
- // - by the time we have a working register allocator we will probably change the ABI
- // and fix the above.
- // - we don't have a way yet to request operands on stack but the base line compiler
- // will leave the operands on the stack with Any().
locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
// Needed for imul on 32bits with 64bits output.
@@ -2046,39 +2059,83 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
}
case Primitive::kPrimLong: {
- DCHECK(second.IsDoubleStackSlot());
-
Register in1_hi = first.AsRegisterPairHigh<Register>();
Register in1_lo = first.AsRegisterPairLow<Register>();
- Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
- Address in2_lo(ESP, second.GetStackIndex());
Register eax = locations->GetTemp(0).AsRegister<Register>();
Register edx = locations->GetTemp(1).AsRegister<Register>();
DCHECK_EQ(EAX, eax);
DCHECK_EQ(EDX, edx);
- // input: in1 - 64 bits, in2 - 64 bits
+ // input: in1 - 64 bits, in2 - 64 bits.
// output: in1
// formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
// parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
// parts: in1.lo = (in1.lo * in2.lo)[31:0]
-
- __ movl(eax, in2_hi);
- // eax <- in1.lo * in2.hi
- __ imull(eax, in1_lo);
- // in1.hi <- in1.hi * in2.lo
- __ imull(in1_hi, in2_lo);
- // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
- __ addl(in1_hi, eax);
- // move in1_lo to eax to prepare for double precision
- __ movl(eax, in1_lo);
- // edx:eax <- in1.lo * in2.lo
- __ mull(in2_lo);
- // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
- __ addl(in1_hi, edx);
- // in1.lo <- (in1.lo * in2.lo)[31:0];
- __ movl(in1_lo, eax);
+ if (second.IsConstant()) {
+ DCHECK(second.GetConstant()->IsLongConstant());
+
+ int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+ int32_t low_value = Low32Bits(value);
+ int32_t high_value = High32Bits(value);
+ Immediate low(low_value);
+ Immediate high(high_value);
+
+ __ movl(eax, high);
+ // eax <- in1.lo * in2.hi
+ __ imull(eax, in1_lo);
+ // in1.hi <- in1.hi * in2.lo
+ __ imull(in1_hi, low);
+ // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
+ __ addl(in1_hi, eax);
+ // move in2_lo to eax to prepare for double precision
+ __ movl(eax, low);
+ // edx:eax <- in1.lo * in2.lo
+ __ mull(in1_lo);
+ // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
+ __ addl(in1_hi, edx);
+ // in1.lo <- (in1.lo * in2.lo)[31:0];
+ __ movl(in1_lo, eax);
+ } else if (second.IsRegisterPair()) {
+ Register in2_hi = second.AsRegisterPairHigh<Register>();
+ Register in2_lo = second.AsRegisterPairLow<Register>();
+
+ __ movl(eax, in2_hi);
+ // eax <- in1.lo * in2.hi
+ __ imull(eax, in1_lo);
+ // in1.hi <- in1.hi * in2.lo
+ __ imull(in1_hi, in2_lo);
+ // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
+ __ addl(in1_hi, eax);
+ // move in1_lo to eax to prepare for double precision
+ __ movl(eax, in1_lo);
+ // edx:eax <- in1.lo * in2.lo
+ __ mull(in2_lo);
+ // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
+ __ addl(in1_hi, edx);
+ // in1.lo <- (in1.lo * in2.lo)[31:0];
+ __ movl(in1_lo, eax);
+ } else {
+ DCHECK(second.IsDoubleStackSlot()) << second;
+ Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
+ Address in2_lo(ESP, second.GetStackIndex());
+
+ __ movl(eax, in2_hi);
+ // eax <- in1.lo * in2.hi
+ __ imull(eax, in1_lo);
+ // in1.hi <- in1.hi * in2.lo
+ __ imull(in1_hi, in2_lo);
+ // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
+ __ addl(in1_hi, eax);
+ // move in1_lo to eax to prepare for double precision
+ __ movl(eax, in1_lo);
+ // edx:eax <- in1.lo * in2.lo
+ __ mull(in2_lo);
+ // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
+ __ addl(in1_hi, edx);
+ // in1.lo <- (in1.lo * in2.lo)[31:0];
+ __ movl(in1_lo, eax);
+ }
break;
}
@@ -2237,7 +2294,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
}
void LocationsBuilderX86::VisitDiv(HDiv* div) {
- LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+ LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong)
? LocationSummary::kCall
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
@@ -2306,8 +2363,10 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
void LocationsBuilderX86::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
+ LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
+ ? LocationSummary::kCall
+ : LocationSummary::kNoCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
switch (type) {
case Primitive::kPrimInt: {
@@ -2646,7 +2705,6 @@ void LocationsBuilderX86::VisitCompare(HCompare* compare) {
switch (compare->InputAt(0)->GetType()) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- // TODO: we set any here but we don't handle constants
locations->SetInAt(1, Location::Any());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -2674,18 +2732,24 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
case Primitive::kPrimLong: {
if (right.IsRegisterPair()) {
__ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
- } else {
- DCHECK(right.IsDoubleStackSlot());
+ } else if (right.IsDoubleStackSlot()) {
__ cmpl(left.AsRegisterPairHigh<Register>(),
Address(ESP, right.GetHighStackIndex(kX86WordSize)));
+ } else {
+ DCHECK(right.IsConstant()) << right;
+ __ cmpl(left.AsRegisterPairHigh<Register>(),
+ Immediate(High32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
}
__ j(kLess, &less); // Signed compare.
__ j(kGreater, &greater); // Signed compare.
if (right.IsRegisterPair()) {
__ cmpl(left.AsRegisterPairLow<Register>(), right.AsRegisterPairLow<Register>());
- } else {
- DCHECK(right.IsDoubleStackSlot());
+ } else if (right.IsDoubleStackSlot()) {
__ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
+ } else {
+ DCHECK(right.IsConstant()) << right;
+ __ cmpl(left.AsRegisterPairLow<Register>(),
+ Immediate(Low32Bits(right.GetConstant()->AsLongConstant()->GetValue())));
}
break;
}
@@ -2770,7 +2834,12 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+ // The output overlaps in case of long: we don't want the low move to overwrite
+ // the object's location.
+ locations->SetOut(Location::RequiresRegister(),
+ (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
// Long values can be loaded atomically into an XMM using movsd.
@@ -2827,6 +2896,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
__ psrlq(temp, Immediate(32));
__ movd(out.AsRegisterPairHigh<Register>(), temp);
} else {
+ DCHECK_NE(base, out.AsRegisterPairLow<Register>());
__ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
@@ -3064,7 +3134,11 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in case of long: we don't want the low move to overwrite
+ // the array's location.
+ locations->SetOut(Location::RequiresRegister(),
+ (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
}
void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -3138,6 +3212,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
Location out = locations->Out();
+ DCHECK_NE(obj, out.AsRegisterPairLow<Register>());
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
__ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
@@ -3569,8 +3644,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
DCHECK(destination.IsStackSlot()) << destination;
__ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
}
- } else {
- DCHECK(constant->IsFloatConstant());
+ } else if (constant->IsFloatConstant()) {
float value = constant->AsFloatConstant()->GetValue();
Immediate imm(bit_cast<float, int32_t>(value));
if (destination.IsFpuRegister()) {
@@ -3583,6 +3657,43 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
DCHECK(destination.IsStackSlot()) << destination;
__ movl(Address(ESP, destination.GetStackIndex()), imm);
}
+ } else if (constant->IsLongConstant()) {
+ int64_t value = constant->AsLongConstant()->GetValue();
+ int32_t low_value = Low32Bits(value);
+ int32_t high_value = High32Bits(value);
+ Immediate low(low_value);
+ Immediate high(high_value);
+ if (destination.IsDoubleStackSlot()) {
+ __ movl(Address(ESP, destination.GetStackIndex()), low);
+ __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
+ } else {
+ __ movl(destination.AsRegisterPairLow<Register>(), low);
+ __ movl(destination.AsRegisterPairHigh<Register>(), high);
+ }
+ } else {
+ DCHECK(constant->IsDoubleConstant());
+ double dbl_value = constant->AsDoubleConstant()->GetValue();
+ int64_t value = bit_cast<double, int64_t>(dbl_value);
+ int32_t low_value = Low32Bits(value);
+ int32_t high_value = High32Bits(value);
+ Immediate low(low_value);
+ Immediate high(high_value);
+ if (destination.IsFpuRegister()) {
+ XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
+ if (value == 0) {
+ // Easy handling of 0.0.
+ __ xorpd(dest, dest);
+ } else {
+ __ pushl(high);
+ __ pushl(low);
+ __ movsd(dest, Address(ESP, 0));
+ __ addl(ESP, Immediate(8));
+ }
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ movl(Address(ESP, destination.GetStackIndex()), low);
+ __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
+ }
}
} else {
LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
@@ -3650,6 +3761,33 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) {
Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
} else if (destination.IsFpuRegister() && source.IsStackSlot()) {
Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
+ // Take advantage of the 16 bytes in the XMM register.
+ XmmRegister reg = source.AsFpuRegister<XmmRegister>();
+ Address stack(ESP, destination.GetStackIndex());
+ // Load the double into the high doubleword.
+ __ movhpd(reg, stack);
+
+ // Store the low double into the destination.
+ __ movsd(stack, reg);
+
+ // Move the high double to the low double.
+ __ psrldq(reg, Immediate(8));
+ } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
+ // Take advantage of the 16 bytes in the XMM register.
+ XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
+ Address stack(ESP, source.GetStackIndex());
+ // Load the double into the high doubleword.
+ __ movhpd(reg, stack);
+
+ // Store the low double into the destination.
+ __ movsd(stack, reg);
+
+ // Move the high double to the low double.
+ __ psrldq(reg, Immediate(8));
+ } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
+ Exchange(destination.GetStackIndex(), source.GetStackIndex());
+ Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize));
} else {
LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
}
@@ -3951,7 +4089,7 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
__ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
__ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
}
- } else {
+ } else if (second.IsDoubleStackSlot()) {
if (instruction->IsAnd()) {
__ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
__ andl(first.AsRegisterPairHigh<Register>(),
@@ -3966,6 +4104,22 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
__ xorl(first.AsRegisterPairHigh<Register>(),
Address(ESP, second.GetHighStackIndex(kX86WordSize)));
}
+ } else {
+ DCHECK(second.IsConstant()) << second;
+ int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
+ Immediate low(Low32Bits(value));
+ Immediate high(High32Bits(value));
+ if (instruction->IsAnd()) {
+ __ andl(first.AsRegisterPairLow<Register>(), low);
+ __ andl(first.AsRegisterPairHigh<Register>(), high);
+ } else if (instruction->IsOr()) {
+ __ orl(first.AsRegisterPairLow<Register>(), low);
+ __ orl(first.AsRegisterPairHigh<Register>(), high);
+ } else {
+ DCHECK(instruction->IsXor());
+ __ xorl(first.AsRegisterPairLow<Register>(), low);
+ __ xorl(first.AsRegisterPairHigh<Register>(), high);
+ }
}
}
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f5a9b7d1f7..c5763de05e 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -245,6 +245,8 @@ class CodeGeneratorX86 : public CodeGenerator {
return type == Primitive::kPrimLong;
}
+ bool ShouldSplitLongMoves() const OVERRIDE { return true; }
+
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
private:
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 07ba95dcfb..dbd7c9e8ad 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -72,7 +72,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
__ Bind(GetEntryLabel());
__ gs()->call(
Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowNullPointer), true));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -88,7 +88,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
__ Bind(GetEntryLabel());
__ gs()->call(
Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowDivZero), true));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -136,10 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(instruction_->GetLocations());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
__ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
- codegen->RestoreLiveRegisters(instruction_->GetLocations());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
+ RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -181,7 +181,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
__ gs()->call(Address::Absolute(
QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -207,7 +207,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 {
CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
@@ -215,7 +215,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 {
__ gs()->call(Address::Absolute((do_clinit_
? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage)
: QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true));
- codegen->RecordPcInfo(at_, dex_pc_);
+ RecordPcInfo(codegen, at_, dex_pc_);
Location out = locations->Out();
// Move the class to the desired location.
@@ -224,7 +224,7 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 {
x64_codegen->Move(out, Location::RegisterLocation(RAX));
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -255,7 +255,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 {
CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
@@ -263,9 +263,9 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 {
Immediate(instruction_->GetStringIndex()));
__ gs()->call(Address::Absolute(
QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pResolveString), true));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -293,7 +293,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(locations);
+ SaveLiveRegisters(codegen, locations);
// We're moving two locations to locations that could overlap, so we need a parallel
// move resolver.
@@ -312,13 +312,13 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
__ gs()->call(
Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pCheckCast), true));
}
- codegen->RecordPcInfo(instruction_, dex_pc_);
+ RecordPcInfo(codegen, instruction_, dex_pc_);
if (instruction_->IsInstanceOf()) {
x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
}
- codegen->RestoreLiveRegisters(locations);
+ RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -374,7 +374,6 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
}
DCHECK(!IsLeafMethod());
- RecordPcInfo(invoke, invoke->GetDexPc());
}
void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -750,10 +749,6 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) {
void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
UNUSED(exit);
- if (kIsDebugBuild) {
- __ Comment("Unreachable");
- __ int3();
- }
}
void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
@@ -1216,6 +1211,7 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi
codegen_->GenerateStaticOrDirectCall(
invoke,
invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>());
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index fca9933872..ec0cc3e98b 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -18,7 +18,28 @@
namespace art {
+// This visitor tries to simplify operations that yield a constant. For example
+// `input * 0` is replaced by a null constant.
+class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
+ public:
+ explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
+
+ private:
+ void VisitShift(HBinaryOperation* shift);
+
+ void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitMul(HMul* instruction) OVERRIDE;
+ void VisitOr(HOr* instruction) OVERRIDE;
+ void VisitRem(HRem* instruction) OVERRIDE;
+ void VisitShl(HShl* instruction) OVERRIDE;
+ void VisitShr(HShr* instruction) OVERRIDE;
+ void VisitSub(HSub* instruction) OVERRIDE;
+ void VisitUShr(HUShr* instruction) OVERRIDE;
+ void VisitXor(HXor* instruction) OVERRIDE;
+};
+
void HConstantFolding::Run() {
+ InstructionWithAbsorbingInputSimplifier simplifier(graph_);
// Process basic blocks in reverse post-order in the dominator tree,
// so that an instruction turned into a constant, used as input of
// another instruction, may possibly be used to turn that second
@@ -38,6 +59,8 @@ void HConstantFolding::Run() {
inst->AsBinaryOperation()->TryStaticEvaluation();
if (constant != nullptr) {
inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant);
+ } else {
+ inst->Accept(&simplifier);
}
} else if (inst->IsUnaryOperation()) {
// Constant folding: replace `op(a)' with a constant at compile
@@ -47,9 +70,166 @@ void HConstantFolding::Run() {
if (constant != nullptr) {
inst->GetBlock()->ReplaceAndRemoveInstructionWith(inst, constant);
}
+ } else if (inst->IsDivZeroCheck()) {
+ // We can safely remove the check if the input is a non-null constant.
+ HDivZeroCheck* check = inst->AsDivZeroCheck();
+ HInstruction* check_input = check->InputAt(0);
+ if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) {
+ check->ReplaceWith(check_input);
+ check->GetBlock()->RemoveInstruction(check);
+ }
}
}
}
}
+void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
+ HInstruction* left = instruction->GetLeft();
+ if (left->IsConstant() && left->AsConstant()->IsZero()) {
+ // Replace code looking like
+ // SHL dst, 0, shift_amount
+ // with
+ // CONSTANT 0
+ instruction->ReplaceWith(left);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ if ((input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // AND dst, src, 0
+ // with
+ // CONSTANT 0
+ instruction->ReplaceWith(input_cst);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ Primitive::Type type = instruction->GetType();
+ if (Primitive::IsIntOrLongType(type) &&
+ (input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // MUL dst, src, 0
+ // with
+ // CONSTANT 0
+ // Integral multiplication by zero always yields zero, but floating-point
+ // multiplication by zero does not always do. For example `Infinity * 0.0`
+ // should yield a NaN.
+ instruction->ReplaceWith(input_cst);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitOr(HOr* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+
+ if (input_cst == nullptr) {
+ return;
+ }
+
+ if (Int64FromConstant(input_cst) == -1) {
+ // Replace code looking like
+ // OR dst, src, 0xFFF...FF
+ // with
+ // CONSTANT 0xFFF...FF
+ instruction->ReplaceWith(input_cst);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) {
+ Primitive::Type type = instruction->GetType();
+
+ if (!Primitive::IsIntegralType(type)) {
+ return;
+ }
+
+ HBasicBlock* block = instruction->GetBlock();
+
+ if (instruction->GetLeft()->IsConstant() &&
+ instruction->GetLeft()->AsConstant()->IsZero()) {
+ // Replace code looking like
+ // REM dst, 0, src
+ // with
+ // CONSTANT 0
+ instruction->ReplaceWith(instruction->GetLeft());
+ block->RemoveInstruction(instruction);
+ }
+
+ HConstant* cst_right = instruction->GetRight()->AsConstant();
+ if (((cst_right != nullptr) &&
+ (cst_right->IsOne() || cst_right->IsMinusOne())) ||
+ (instruction->GetLeft() == instruction->GetRight())) {
+ // Replace code looking like
+ // REM dst, src, 1
+ // or
+ // REM dst, src, -1
+ // or
+ // REM dst, src, src
+ // with
+ // CONSTANT 0
+ ArenaAllocator* allocator = GetGraph()->GetArena();
+ block->ReplaceAndRemoveInstructionWith(instruction,
+ HConstant::NewConstant(allocator, type, 0));
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitShl(HShl* instruction) {
+ VisitShift(instruction);
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitShr(HShr* instruction) {
+ VisitShift(instruction);
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) {
+ Primitive::Type type = instruction->GetType();
+
+ if (!Primitive::IsIntegralType(type)) {
+ return;
+ }
+
+ HBasicBlock* block = instruction->GetBlock();
+ ArenaAllocator* allocator = GetGraph()->GetArena();
+
+ // We assume that GVN has run before, so we only perform a pointer
+ // comparison. If for some reason the values are equal but the pointers are
+ // different, we are still correct and only miss an optimisation
+ // opportunity.
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // SUB dst, src, src
+ // with
+ // CONSTANT 0
+ // Note that we cannot optimise `x - x` to `0` for floating-point. It does
+ // not work when `x` is an infinity.
+ block->ReplaceAndRemoveInstructionWith(instruction,
+ HConstant::NewConstant(allocator, type, 0));
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitUShr(HUShr* instruction) {
+ VisitShift(instruction);
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitXor(HXor* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // XOR dst, src, src
+ // with
+ // CONSTANT 0
+ Primitive::Type type = instruction->GetType();
+ HBasicBlock* block = instruction->GetBlock();
+ ArenaAllocator* allocator = GetGraph()->GetArena();
+
+ block->ReplaceAndRemoveInstructionWith(instruction,
+ HConstant::NewConstant(allocator, type, 0));
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index a7f1f74e27..76b9f4fe7e 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -362,6 +362,12 @@ void SSAChecker::VisitPhi(HPhi* phi) {
Primitive::PrettyDescriptor(phi->GetType())));
}
}
+ if (phi->GetType() != HPhi::ToPhiType(phi->GetType())) {
+ AddError(StringPrintf("Phi %d in block %d does not have an expected phi type: %s",
+ phi->GetId(),
+ phi->GetBlock()->GetBlockId(),
+ Primitive::PrettyDescriptor(phi->GetType())));
+ }
}
void SSAChecker::VisitIf(HIf* instruction) {
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index ea65dc0780..74848d5d96 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -16,30 +16,12 @@
#include "gvn.h"
#include "side_effects_analysis.h"
+#include "utils.h"
-namespace art {
-
-/**
- * A node in the collision list of a ValueSet. Encodes the instruction,
- * the hash code, and the next node in the collision list.
- */
-class ValueSetNode : public ArenaObject<kArenaAllocMisc> {
- public:
- ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next)
- : instruction_(instruction), hash_code_(hash_code), next_(next) {}
+#include "utils/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
- size_t GetHashCode() const { return hash_code_; }
- HInstruction* GetInstruction() const { return instruction_; }
- ValueSetNode* GetNext() const { return next_; }
- void SetNext(ValueSetNode* node) { next_ = node; }
-
- private:
- HInstruction* const instruction_;
- const size_t hash_code_;
- ValueSetNode* next_;
-
- DISALLOW_COPY_AND_ASSIGN(ValueSetNode);
-};
+namespace art {
/**
* A ValueSet holds instructions that can replace other instructions. It is updated
@@ -52,39 +34,68 @@ class ValueSetNode : public ArenaObject<kArenaAllocMisc> {
*/
class ValueSet : public ArenaObject<kArenaAllocMisc> {
public:
+ // Constructs an empty ValueSet which owns all its buckets.
explicit ValueSet(ArenaAllocator* allocator)
- : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) {
- for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
- table_[i] = nullptr;
+ : allocator_(allocator),
+ num_buckets_(kMinimumNumberOfBuckets),
+ buckets_(allocator->AllocArray<Node*>(num_buckets_)),
+ buckets_owned_(allocator, num_buckets_, false),
+ num_entries_(0) {
+ // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
+ DCHECK(IsPowerOfTwo(num_buckets_));
+ buckets_owned_.SetInitialBits(num_buckets_);
+ }
+
+ // Copy constructor. Depending on the load factor, it will either make a deep
+ // copy (all buckets owned) or a shallow one (buckets pointing to the parent).
+ ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy)
+ : allocator_(allocator),
+ num_buckets_(to_copy.IdealBucketCount()),
+ buckets_(allocator->AllocArray<Node*>(num_buckets_)),
+ buckets_owned_(allocator, num_buckets_, false),
+ num_entries_(to_copy.num_entries_) {
+ // ArenaAllocator returns zeroed memory, so entries of buckets_ and
+ // buckets_owned_ are initialized to nullptr and false, respectively.
+ DCHECK(IsPowerOfTwo(num_buckets_));
+ if (num_buckets_ == to_copy.num_buckets_) {
+ // Hash table remains the same size. We copy the bucket pointers and leave
+ // all buckets_owned_ bits false.
+ memcpy(buckets_, to_copy.buckets_, num_buckets_ * sizeof(Node*));
+ } else {
+ // Hash table size changes. We copy and rehash all entries, and set all
+ // buckets_owned_ bits to true.
+ for (size_t i = 0; i < to_copy.num_buckets_; ++i) {
+ for (Node* node = to_copy.buckets_[i]; node != nullptr; node = node->GetNext()) {
+ size_t new_index = BucketIndex(node->GetHashCode());
+ buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]);
+ }
+ }
+ buckets_owned_.SetInitialBits(num_buckets_);
}
}
// Adds an instruction in the set.
void Add(HInstruction* instruction) {
DCHECK(Lookup(instruction) == nullptr);
- size_t hash_code = instruction->ComputeHashCode();
- size_t index = hash_code % kDefaultNumberOfEntries;
- if (table_[index] == nullptr) {
- table_[index] = instruction;
- } else {
- collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_);
+ size_t hash_code = HashCode(instruction);
+ size_t index = BucketIndex(hash_code);
+
+ if (!buckets_owned_.IsBitSet(index)) {
+ CloneBucket(index);
}
- ++number_of_entries_;
+ buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]);
+ ++num_entries_;
}
- // If in the set, returns an equivalent instruction to the given instruction. Returns
- // null otherwise.
+ // If in the set, returns an equivalent instruction to the given instruction.
+ // Returns null otherwise.
HInstruction* Lookup(HInstruction* instruction) const {
- size_t hash_code = instruction->ComputeHashCode();
- size_t index = hash_code % kDefaultNumberOfEntries;
- HInstruction* existing = table_[index];
- if (existing != nullptr && existing->Equals(instruction)) {
- return existing;
- }
+ size_t hash_code = HashCode(instruction);
+ size_t index = BucketIndex(hash_code);
- for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+ for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
if (node->GetHashCode() == hash_code) {
- existing = node->GetInstruction();
+ HInstruction* existing = node->GetInstruction();
if (existing->Equals(instruction)) {
return existing;
}
@@ -93,126 +104,193 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> {
return nullptr;
}
- // Returns whether `instruction` is in the set.
- HInstruction* IdentityLookup(HInstruction* instruction) const {
- size_t hash_code = instruction->ComputeHashCode();
- size_t index = hash_code % kDefaultNumberOfEntries;
- HInstruction* existing = table_[index];
- if (existing != nullptr && existing == instruction) {
- return existing;
- }
+ // Returns whether instruction is in the set.
+ bool Contains(HInstruction* instruction) const {
+ size_t hash_code = HashCode(instruction);
+ size_t index = BucketIndex(hash_code);
- for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
- if (node->GetHashCode() == hash_code) {
- existing = node->GetInstruction();
- if (existing == instruction) {
- return existing;
- }
+ for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
+ if (node->GetInstruction() == instruction) {
+ return true;
}
}
- return nullptr;
+ return false;
}
- // Removes all instructions in the set that are affected by the given side effects.
+ // Removes all instructions in the set affected by the given side effects.
void Kill(SideEffects side_effects) {
- for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
- HInstruction* instruction = table_[i];
- if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) {
- table_[i] = nullptr;
- --number_of_entries_;
- }
- }
+ DeleteAllImpureWhich([side_effects](Node* node) {
+ return node->GetInstruction()->GetSideEffects().DependsOn(side_effects);
+ });
+ }
- for (ValueSetNode* current = collisions_, *previous = nullptr;
- current != nullptr;
- current = current->GetNext()) {
- HInstruction* instruction = current->GetInstruction();
- if (instruction->GetSideEffects().DependsOn(side_effects)) {
- if (previous == nullptr) {
- collisions_ = current->GetNext();
- } else {
- previous->SetNext(current->GetNext());
- }
- --number_of_entries_;
- } else {
- previous = current;
- }
+ // Updates this set by intersecting with instructions in a predecessor's set.
+ void IntersectWith(ValueSet* predecessor) {
+ if (IsEmpty()) {
+ return;
+ } else if (predecessor->IsEmpty()) {
+ Clear();
+ } else {
+ // Pure instructions do not need to be tested because only impure
+ // instructions can be killed.
+ DeleteAllImpureWhich([predecessor](Node* node) {
+ return !predecessor->Contains(node->GetInstruction());
+ });
}
}
- // Returns a copy of this set.
- ValueSet* Copy() const {
- ValueSet* copy = new (allocator_) ValueSet(allocator_);
+ bool IsEmpty() const { return num_entries_ == 0; }
+ size_t GetNumberOfEntries() const { return num_entries_; }
- for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
- copy->table_[i] = table_[i];
+ private:
+ class Node : public ArenaObject<kArenaAllocMisc> {
+ public:
+ Node(HInstruction* instruction, size_t hash_code, Node* next)
+ : instruction_(instruction), hash_code_(hash_code), next_(next) {}
+
+ size_t GetHashCode() const { return hash_code_; }
+ HInstruction* GetInstruction() const { return instruction_; }
+ Node* GetNext() const { return next_; }
+ void SetNext(Node* node) { next_ = node; }
+
+ Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) {
+ return new (allocator) Node(instruction_, hash_code_, new_next);
}
- // Note that the order will be inverted in the copy. This is fine, as the order is not
- // relevant for a ValueSet.
- for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
- copy->collisions_ = new (allocator_) ValueSetNode(
- node->GetInstruction(), node->GetHashCode(), copy->collisions_);
+ private:
+ HInstruction* const instruction_;
+ const size_t hash_code_;
+ Node* next_;
+
+ DISALLOW_COPY_AND_ASSIGN(Node);
+ };
+
+ // Creates our own copy of a bucket that is currently pointing to a parent.
+ // This algorithm can be called while iterating over the bucket because it
+ // preserves the order of entries in the bucket and will return the clone of
+ // the given 'iterator'.
+ Node* CloneBucket(size_t index, Node* iterator = nullptr) {
+ DCHECK(!buckets_owned_.IsBitSet(index));
+ Node* clone_current = nullptr;
+ Node* clone_previous = nullptr;
+ Node* clone_iterator = nullptr;
+ for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) {
+ clone_current = node->Dup(allocator_, nullptr);
+ if (node == iterator) {
+ clone_iterator = clone_current;
+ }
+ if (clone_previous == nullptr) {
+ buckets_[index] = clone_current;
+ } else {
+ clone_previous->SetNext(clone_current);
+ }
+ clone_previous = clone_current;
}
-
- copy->number_of_entries_ = number_of_entries_;
- return copy;
+ buckets_owned_.SetBit(index);
+ return clone_iterator;
}
void Clear() {
- number_of_entries_ = 0;
- collisions_ = nullptr;
- for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
- table_[i] = nullptr;
+ num_entries_ = 0;
+ for (size_t i = 0; i < num_buckets_; ++i) {
+ buckets_[i] = nullptr;
}
+ buckets_owned_.SetInitialBits(num_buckets_);
}
- // Update this `ValueSet` by intersecting with instructions in `other`.
- void IntersectionWith(ValueSet* other) {
- if (IsEmpty()) {
- return;
- } else if (other->IsEmpty()) {
- Clear();
- } else {
- for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
- if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) {
- --number_of_entries_;
- table_[i] = nullptr;
+ // Iterates over buckets with impure instructions (even indices) and deletes
+ // the ones on which 'cond' returns true.
+ template<typename Functor>
+ void DeleteAllImpureWhich(Functor cond) {
+ for (size_t i = 0; i < num_buckets_; i += 2) {
+ Node* node = buckets_[i];
+ Node* previous = nullptr;
+
+ if (node == nullptr) {
+ continue;
+ }
+
+ if (!buckets_owned_.IsBitSet(i)) {
+ // Bucket is not owned but maybe we won't need to change it at all.
+ // Iterate as long as the entries don't satisfy 'cond'.
+ while (node != nullptr) {
+ if (cond(node)) {
+ // We do need to delete an entry but we do not own the bucket.
+ // Clone the bucket, make sure 'previous' and 'node' point to
+ // the cloned entries and break.
+ previous = CloneBucket(i, previous);
+ node = (previous == nullptr) ? buckets_[i] : previous->GetNext();
+ break;
+ }
+ previous = node;
+ node = node->GetNext();
}
}
- for (ValueSetNode* current = collisions_, *previous = nullptr;
- current != nullptr;
- current = current->GetNext()) {
- if (other->IdentityLookup(current->GetInstruction()) == nullptr) {
+
+ // By this point we either own the bucket and can start deleting entries,
+ // or we do not own it but no entries matched 'cond'.
+ DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr);
+
+ // We iterate over the remainder of entries and delete those that match
+ // the given condition.
+ while (node != nullptr) {
+ Node* next = node->GetNext();
+ if (cond(node)) {
if (previous == nullptr) {
- collisions_ = current->GetNext();
+ buckets_[i] = next;
} else {
- previous->SetNext(current->GetNext());
+ previous->SetNext(next);
}
- --number_of_entries_;
} else {
- previous = current;
+ previous = node;
}
+ node = next;
}
}
}
- bool IsEmpty() const { return number_of_entries_ == 0; }
- size_t GetNumberOfEntries() const { return number_of_entries_; }
+ // Computes a bucket count such that the load factor is reasonable.
+ // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2.
+ size_t IdealBucketCount() const {
+ size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1));
+ if (bucket_count > kMinimumNumberOfBuckets) {
+ return bucket_count;
+ } else {
+ return kMinimumNumberOfBuckets;
+ }
+ }
- private:
- static constexpr size_t kDefaultNumberOfEntries = 8;
+ // Generates a hash code for an instruction. Pure instructions are put into
+ // odd buckets to speed up deletion.
+ size_t HashCode(HInstruction* instruction) const {
+ size_t hash_code = instruction->ComputeHashCode();
+ if (instruction->GetSideEffects().HasDependencies()) {
+ return (hash_code << 1) | 0;
+ } else {
+ return (hash_code << 1) | 1;
+ }
+ }
+
+ // Converts a hash code to a bucket index.
+ size_t BucketIndex(size_t hash_code) const {
+ return hash_code & (num_buckets_ - 1);
+ }
ArenaAllocator* const allocator_;
+ // The internal bucket implementation of the set.
+ size_t const num_buckets_;
+ Node** const buckets_;
+
+ // Flags specifying which buckets were copied into the set from its parent.
+ // If a flag is not set, the corresponding bucket points to entries in the
+ // parent and must be cloned prior to making changes.
+ ArenaBitVector buckets_owned_;
+
// The number of entries in the set.
- size_t number_of_entries_;
+ size_t num_entries_;
- // The internal implementation of the set. It uses a combination of a hash code based
- // fixed-size list, and a linked list to handle hash code collisions.
- // TODO: Tune the fixed size list original size, and support growing it.
- ValueSetNode* collisions_;
- HInstruction* table_[kDefaultNumberOfEntries];
+ static constexpr size_t kMinimumNumberOfBuckets = 8;
DISALLOW_COPY_AND_ASSIGN(ValueSet);
};
@@ -270,11 +348,14 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
set = new (allocator_) ValueSet(allocator_);
} else {
HBasicBlock* dominator = block->GetDominator();
- set = sets_.Get(dominator->GetBlockId());
- if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) {
+ ValueSet* dominator_set = sets_.Get(dominator->GetBlockId());
+ if (dominator->GetSuccessors().Size() == 1) {
+ DCHECK_EQ(dominator->GetSuccessors().Get(0), block);
+ set = dominator_set;
+ } else {
// We have to copy if the dominator has other successors, or `block` is not a successor
// of the dominator.
- set = set->Copy();
+ set = new (allocator_) ValueSet(allocator_, *dominator_set);
}
if (!set->IsEmpty()) {
if (block->IsLoopHeader()) {
@@ -282,7 +363,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
set->Kill(side_effects_.GetLoopEffects(block));
} else if (predecessors.Size() > 1) {
for (size_t i = 0, e = predecessors.Size(); i < e; ++i) {
- set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId()));
+ set->IntersectWith(sets_.Get(predecessors.Get(i)->GetBlockId()));
if (set->IsEmpty()) {
break;
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index b34957a17e..bd9267c4db 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -38,6 +38,11 @@ static constexpr int kMaxInlineCodeUnits = 100;
static constexpr int kDepthLimit = 5;
void HInliner::Run() {
+ if (graph_->IsDebuggable()) {
+ // For simplicity, we currently never inline when the graph is debuggable. This avoids
+ // doing some logic in the runtime to discover if a method could have been inlined.
+ return;
+ }
const GrowableArray<HBasicBlock*>& blocks = graph_->GetReversePostOrder();
for (size_t i = 0; i < blocks.Size(); ++i) {
HBasicBlock* block = blocks.Get(i);
@@ -124,8 +129,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
resolved_method->GetAccessFlags(),
nullptr);
- HGraph* callee_graph =
- new (graph_->GetArena()) HGraph(graph_->GetArena(), graph_->GetCurrentInstructionId());
+ HGraph* callee_graph = new (graph_->GetArena()) HGraph(
+ graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId());
OptimizingCompilerStats inline_stats;
HGraphBuilder builder(callee_graph,
@@ -155,15 +160,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
}
// Run simple optimizations on the graph.
- SsaRedundantPhiElimination redundant_phi(callee_graph);
- SsaDeadPhiElimination dead_phi(callee_graph);
HDeadCodeElimination dce(callee_graph);
HConstantFolding fold(callee_graph);
InstructionSimplifier simplify(callee_graph, stats_);
HOptimization* optimizations[] = {
- &redundant_phi,
- &dead_phi,
&dce,
&fold,
&simplify,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index fd99070780..2ef19b92a1 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -27,6 +27,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
: HGraphVisitor(graph), stats_(stats) {}
private:
+ void VisitShift(HBinaryOperation* shift);
+
void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
void VisitEqual(HEqual* equal) OVERRIDE;
void VisitArraySet(HArraySet* equal) OVERRIDE;
@@ -34,6 +36,16 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
+ void VisitAdd(HAdd* instruction) OVERRIDE;
+ void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitDiv(HDiv* instruction) OVERRIDE;
+ void VisitMul(HMul* instruction) OVERRIDE;
+ void VisitOr(HOr* instruction) OVERRIDE;
+ void VisitShl(HShl* instruction) OVERRIDE;
+ void VisitShr(HShr* instruction) OVERRIDE;
+ void VisitSub(HSub* instruction) OVERRIDE;
+ void VisitUShr(HUShr* instruction) OVERRIDE;
+ void VisitXor(HXor* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
@@ -43,6 +55,29 @@ void InstructionSimplifier::Run() {
visitor.VisitInsertionOrder();
}
+namespace {
+
+bool AreAllBitsSet(HConstant* constant) {
+ return Int64FromConstant(constant) == -1;
+}
+
+} // namespace
+
+void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+
+ if ((input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // SHL dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
HInstruction* obj = null_check->InputAt(0);
if (!obj->CanBeNull()) {
@@ -137,4 +172,234 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct
}
}
+void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+ if ((input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // ADD dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+
+ if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) {
+ // Replace code looking like
+ // AND dst, src, 0xFFF...FF
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ // We assume that GVN has run before, so we only perform a pointer comparison.
+ // If for some reason the values are equal but the pointers are different, we
+ // are still correct and only miss an optimisation opportunity.
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // AND dst, src, src
+ // with
+ // src
+ instruction->ReplaceWith(instruction->GetLeft());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+ Primitive::Type type = instruction->GetType();
+
+ if ((input_cst != nullptr) && input_cst->IsOne()) {
+ // Replace code looking like
+ // DIV dst, src, 1
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ if ((input_cst != nullptr) && input_cst->IsMinusOne() &&
+ (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) {
+ // Replace code looking like
+ // DIV dst, src, -1
+ // with
+ // NEG dst, src
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
+ instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+ }
+}
+
+void InstructionSimplifierVisitor::VisitMul(HMul* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+ Primitive::Type type = instruction->GetType();
+ HBasicBlock* block = instruction->GetBlock();
+ ArenaAllocator* allocator = GetGraph()->GetArena();
+
+ if (input_cst == nullptr) {
+ return;
+ }
+
+ if (input_cst->IsOne()) {
+ // Replace code looking like
+ // MUL dst, src, 1
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ if (input_cst->IsMinusOne() &&
+ (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) {
+ // Replace code looking like
+ // MUL dst, src, -1
+ // with
+ // NEG dst, src
+ HNeg* neg = new (allocator) HNeg(type, input_other);
+ block->ReplaceAndRemoveInstructionWith(instruction, neg);
+ return;
+ }
+
+ if (Primitive::IsFloatingPointType(type) &&
+ ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->GetValue() == 2.0f) ||
+ (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->GetValue() == 2.0))) {
+ // Replace code looking like
+ // FP_MUL dst, src, 2.0
+ // with
+ // FP_ADD dst, src, src
+ // The 'int' and 'long' cases are handled below.
+ block->ReplaceAndRemoveInstructionWith(instruction,
+ new (allocator) HAdd(type, input_other, input_other));
+ return;
+ }
+
+ if (Primitive::IsIntOrLongType(type)) {
+ int64_t factor = Int64FromConstant(input_cst);
+ // We expect the `0` case to have been handled in the constant folding pass.
+ DCHECK_NE(factor, 0);
+ if (IsPowerOfTwo(factor)) {
+ // Replace code looking like
+ // MUL dst, src, pow_of_2
+ // with
+ // SHL dst, src, log2(pow_of_2)
+ HIntConstant* shift = new (allocator) HIntConstant(WhichPowerOf2(factor));
+ block->InsertInstructionBefore(shift, instruction);
+ HShl* shl = new(allocator) HShl(type, input_other, shift);
+ block->ReplaceAndRemoveInstructionWith(instruction, shl);
+ }
+ }
+}
+
+void InstructionSimplifierVisitor::VisitOr(HOr* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+
+ if ((input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // OR dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ // We assume that GVN has run before, so we only perform a pointer comparison.
+ // If for some reason the values are equal but the pointers are different, we
+ // are still correct and only miss an optimisation opportunity.
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // OR dst, src, src
+ // with
+ // src
+ instruction->ReplaceWith(instruction->GetLeft());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
+ VisitShift(instruction);
+}
+
+void InstructionSimplifierVisitor::VisitShr(HShr* instruction) {
+ VisitShift(instruction);
+}
+
+void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+
+ if ((input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // SUB dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ Primitive::Type type = instruction->GetType();
+ if (!Primitive::IsIntegralType(type)) {
+ return;
+ }
+
+ HBasicBlock* block = instruction->GetBlock();
+ ArenaAllocator* allocator = GetGraph()->GetArena();
+
+ if (instruction->GetLeft()->IsConstant()) {
+ int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant());
+ if (left == 0) {
+ // Replace code looking like
+ // SUB dst, 0, src
+ // with
+ // NEG dst, src
+ // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When
+ // `x` is `0.0`, the former expression yields `0.0`, while the later
+ // yields `-0.0`.
+ HNeg* neg = new (allocator) HNeg(type, instruction->GetRight());
+ block->ReplaceAndRemoveInstructionWith(instruction, neg);
+ }
+ }
+}
+
+void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
+ VisitShift(instruction);
+}
+
+void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ HInstruction* input_other = instruction->GetLeastConstantLeft();
+
+ if ((input_cst != nullptr) && input_cst->IsZero()) {
+ // Replace code looking like
+ // XOR dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) {
+ // Replace code looking like
+ // XOR dst, src, 0xFFF...FF
+ // with
+ // NOT dst, src
+ HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other);
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not);
+ return;
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index a82d80af13..0c9eb94172 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -114,12 +114,13 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM {
CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(invoke_->GetLocations());
+ SaveLiveRegisters(codegen, invoke_->GetLocations());
MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
+ RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
} else {
UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
UNREACHABLE();
@@ -133,7 +134,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM {
MoveFromReturnRegister(out, invoke_->GetType(), codegen);
}
- codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ RestoreLiveRegisters(codegen, invoke_->GetLocations());
__ b(GetExitLabel());
}
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1ddff8a125..19b04ae094 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -122,12 +122,13 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(invoke_->GetLocations());
+ SaveLiveRegisters(codegen, invoke_->GetLocations());
MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
+ RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
} else {
UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
UNREACHABLE();
@@ -141,7 +142,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
MoveFromReturnRegister(out, invoke_->GetType(), codegen);
}
- codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ RestoreLiveRegisters(codegen, invoke_->GetLocations());
__ B(GetExitLabel());
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c73f092a61..2064b18138 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -134,12 +134,13 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
__ Bind(GetEntryLabel());
- codegen->SaveLiveRegisters(invoke_->GetLocations());
+ SaveLiveRegisters(codegen, invoke_->GetLocations());
MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+ RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
} else {
UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
UNREACHABLE();
@@ -153,7 +154,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
MoveFromReturnRegister(out, invoke_->GetType(), codegen);
}
- codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ RestoreLiveRegisters(codegen, invoke_->GetLocations());
__ jmp(GetExitLabel());
}
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 907eff162f..0b0cfde0cf 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -388,44 +388,44 @@ TEST(LivenessTest, Loop5) {
// Make sure we create a preheader of a loop when a header originally has two
// incoming blocks and one back edge.
// Bitsets are made of:
- // (constant0, constant4, constant5, phi in block 8, phi in block 4)
+ // (constant0, constant4, constant5, phi in block 8)
const char* expected =
"Block 0\n"
- " live in: (00000)\n"
- " live out: (11100)\n"
- " kill: (11100)\n"
+ " live in: (0000)\n"
+ " live out: (1110)\n"
+ " kill: (1110)\n"
"Block 1\n"
- " live in: (11100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
+ " live in: (1110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
"Block 2\n"
- " live in: (01000)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0100)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n"
"Block 3\n"
- " live in: (00100)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0010)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n"
"Block 4\n" // loop header
- " live in: (00000)\n"
- " live out: (00001)\n"
- " kill: (00001)\n"
+ " live in: (0001)\n"
+ " live out: (0001)\n"
+ " kill: (0000)\n"
"Block 5\n" // back edge
- " live in: (00001)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0001)\n"
+ " live out: (0001)\n"
+ " kill: (0000)\n"
"Block 6\n" // return block
- " live in: (00001)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0001)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n"
"Block 7\n" // exit block
- " live in: (00000)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0000)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n"
"Block 8\n" // synthesized pre header
- " live in: (00000)\n"
- " live out: (00000)\n"
- " kill: (00010)\n";
+ " live in: (0000)\n"
+ " live out: (0001)\n"
+ " kill: (0001)\n";
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 198cc15cce..566c0daf1e 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -211,15 +211,25 @@ class Location : public ValueObject {
}
Location ToLow() const {
- return IsRegisterPair()
- ? Location::RegisterLocation(low())
- : Location::FpuRegisterLocation(low());
+ if (IsRegisterPair()) {
+ return Location::RegisterLocation(low());
+ } else if (IsFpuRegisterPair()) {
+ return Location::FpuRegisterLocation(low());
+ } else {
+ DCHECK(IsDoubleStackSlot());
+ return Location::StackSlot(GetStackIndex());
+ }
}
Location ToHigh() const {
- return IsRegisterPair()
- ? Location::RegisterLocation(high())
- : Location::FpuRegisterLocation(high());
+ if (IsRegisterPair()) {
+ return Location::RegisterLocation(high());
+ } else if (IsFpuRegisterPair()) {
+ return Location::FpuRegisterLocation(high());
+ } else {
+ DCHECK(IsDoubleStackSlot());
+ return Location::StackSlot(GetHighStackIndex(4));
+ }
}
static uintptr_t EncodeStackIndex(intptr_t stack_index) {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e51bbc330a..a90ebced69 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -673,10 +673,43 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const {
return nullptr;
}
+HConstant* HBinaryOperation::GetConstantRight() const {
+ if (GetRight()->IsConstant()) {
+ return GetRight()->AsConstant();
+ } else if (IsCommutative() && GetLeft()->IsConstant()) {
+ return GetLeft()->AsConstant();
+ } else {
+ return nullptr;
+ }
+}
+
+// If `GetConstantRight()` returns one of the input, this returns the other
+// one. Otherwise it returns nullptr.
+HInstruction* HBinaryOperation::GetLeastConstantLeft() const {
+ HInstruction* most_constant_right = GetConstantRight();
+ if (most_constant_right == nullptr) {
+ return nullptr;
+ } else if (most_constant_right == GetLeft()) {
+ return GetRight();
+ } else {
+ return GetLeft();
+ }
+}
+
bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const {
return this == if_->GetPreviousDisregardingMoves();
}
+HConstant* HConstant::NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val) {
+ if (type == Primitive::kPrimInt) {
+ DCHECK(IsInt<32>(val));
+ return new (allocator) HIntConstant(val);
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimLong);
+ return new (allocator) HLongConstant(val);
+ }
+}
+
bool HInstruction::Equals(HInstruction* other) const {
if (!InstructionTypeEquals(other)) return false;
DCHECK_EQ(GetKind(), other->GetKind());
@@ -907,7 +940,8 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
} else {
if (!returns_void) {
// There will be multiple returns.
- return_value = new (allocator) HPhi(allocator, kNoRegNumber, 0, invoke->GetType());
+ return_value = new (allocator) HPhi(
+ allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke->GetType()));
to->AddPhi(return_value->AsPhi());
}
for (size_t i = 0, e = to->GetPredecessors().Size(); i < e; ++i) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8b56166610..a35fa1d8c3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -103,7 +103,7 @@ class HInstructionList {
// Control-flow graph of a method. Contains a list of basic blocks.
class HGraph : public ArenaObject<kArenaAllocMisc> {
public:
- HGraph(ArenaAllocator* arena, int start_instruction_id = 0)
+ HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0)
: arena_(arena),
blocks_(arena, kDefaultNumberOfBlocks),
reverse_post_order_(arena, kDefaultNumberOfBlocks),
@@ -114,6 +114,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
number_of_in_vregs_(0),
temporaries_vreg_slots_(0),
has_array_accesses_(false),
+ debuggable_(debuggable),
current_instruction_id_(start_instruction_id) {}
ArenaAllocator* GetArena() const { return arena_; }
@@ -132,8 +133,13 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// recognition. Returns whether it was successful in doing all these steps.
bool TryBuildingSsa() {
BuildDominatorTree();
+ // The SSA builder requires loops to all be natural. Specifically, the dead phi
+ // elimination phase checks the consistency of the graph when doing a post-order
+ // visit for eliminating dead phis: a dead phi can only have loop header phi
+ // users remaining when being visited.
+ if (!AnalyzeNaturalLoops()) return false;
TransformToSsa();
- return AnalyzeNaturalLoops();
+ return true;
}
void BuildDominatorTree();
@@ -208,6 +214,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
has_array_accesses_ = value;
}
+ bool IsDebuggable() const { return debuggable_; }
+
HNullConstant* GetNullConstant();
private:
@@ -248,6 +256,11 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// Has array accesses. We can totally skip BCE if it's false.
bool has_array_accesses_;
+ // Indicates whether the graph should be compiled in a way that
+ // ensures full debuggability. If false, we can apply more
+ // aggressive optimizations that may limit the level of debugging.
+ const bool debuggable_;
+
// The current id to assign to a newly added instruction. See HInstruction.id_.
int32_t current_instruction_id_;
@@ -1096,6 +1109,7 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
+ bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); }
// Does this instruction strictly dominate `other_instruction`?
// Returns false if this instruction and `other_instruction` are the same.
@@ -1561,6 +1575,14 @@ class HBinaryOperation : public HExpression<2> {
virtual int32_t Evaluate(int32_t x, int32_t y) const = 0;
virtual int64_t Evaluate(int64_t x, int64_t y) const = 0;
+ // Returns an input that can legally be used as the right input and is
+ // constant, or nullptr.
+ HConstant* GetConstantRight() const;
+
+ // If `GetConstantRight()` returns one of the input, this returns the other
+ // one. Otherwise it returns nullptr.
+ HInstruction* GetLeastConstantLeft() const;
+
DECLARE_INSTRUCTION(BinaryOperation);
private:
@@ -1832,6 +1854,12 @@ class HConstant : public HExpression<0> {
bool CanBeMoved() const OVERRIDE { return true; }
+ virtual bool IsMinusOne() const { return false; }
+ virtual bool IsZero() const { return false; }
+ virtual bool IsOne() const { return false; }
+
+ static HConstant* NewConstant(ArenaAllocator* allocator, Primitive::Type type, int64_t val);
+
DECLARE_INSTRUCTION(Constant);
private:
@@ -1851,6 +1879,16 @@ class HFloatConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+ bool IsMinusOne() const OVERRIDE {
+ return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f));
+ }
+ bool IsZero() const OVERRIDE {
+ return AsFloatConstant()->GetValue() == 0.0f;
+ }
+ bool IsOne() const OVERRIDE {
+ return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f);
+ }
+
DECLARE_INSTRUCTION(FloatConstant);
private:
@@ -1872,6 +1910,16 @@ class HDoubleConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+ bool IsMinusOne() const OVERRIDE {
+ return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0));
+ }
+ bool IsZero() const OVERRIDE {
+ return AsDoubleConstant()->GetValue() == 0.0;
+ }
+ bool IsOne() const OVERRIDE {
+ return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0);
+ }
+
DECLARE_INSTRUCTION(DoubleConstant);
private:
@@ -1917,6 +1965,10 @@ class HIntConstant : public HConstant {
// method is an workaround until we fix the above.
bool ActAsNullConstant() const OVERRIDE { return value_ == 0; }
+ bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
+ bool IsZero() const OVERRIDE { return GetValue() == 0; }
+ bool IsOne() const OVERRIDE { return GetValue() == 1; }
+
DECLARE_INSTRUCTION(IntConstant);
private:
@@ -1937,6 +1989,10 @@ class HLongConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+ bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
+ bool IsZero() const OVERRIDE { return GetValue() == 0; }
+ bool IsOne() const OVERRIDE { return GetValue() == 1; }
+
DECLARE_INSTRUCTION(LongConstant);
private:
@@ -2498,6 +2554,19 @@ class HPhi : public HInstruction {
inputs_.SetSize(number_of_inputs);
}
+ // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
+ static Primitive::Type ToPhiType(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ return Primitive::kPrimInt;
+ default:
+ return type;
+ }
+ }
+
size_t InputCount() const OVERRIDE { return inputs_.Size(); }
void AddInput(HInstruction* input);
@@ -3289,8 +3358,19 @@ class HParallelMove : public HTemplateInstruction<0> {
if (kIsDebugBuild) {
if (instruction != nullptr) {
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
- << "Doing parallel moves for the same instruction.";
+ if (moves_.Get(i).GetInstruction() == instruction) {
+ // Special case the situation where the move is for the spill slot
+ // of the instruction.
+ if ((GetPrevious() == instruction)
+ || ((GetPrevious() == nullptr)
+ && instruction->IsPhi()
+ && instruction->GetBlock() == GetBlock())) {
+ DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind())
+ << "Doing parallel moves for the same instruction.";
+ } else {
+ DCHECK(false) << "Doing parallel moves for the same instruction.";
+ }
+ }
}
}
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
@@ -3441,6 +3521,12 @@ class HBlocksInLoopIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator);
};
+inline int64_t Int64FromConstant(HConstant* constant) {
+ DCHECK(constant->IsIntConstant() || constant->IsLongConstant());
+ return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
+ : constant->AsLongConstant()->GetValue();
+}
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index eb984248a9..b70f9252ae 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -173,24 +173,40 @@ class OptimizingCompiler FINAL : public Compiler {
jobject class_loader,
const DexFile& dex_file) const OVERRIDE;
+ CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
+ uint32_t access_flags,
+ InvokeType invoke_type,
+ uint16_t class_def_idx,
+ uint32_t method_idx,
+ jobject class_loader,
+ const DexFile& dex_file) const;
+
CompiledMethod* JniCompile(uint32_t access_flags,
uint32_t method_idx,
- const DexFile& dex_file) const OVERRIDE;
+ const DexFile& dex_file) const OVERRIDE {
+ return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
+ }
uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
+ InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
+ }
bool WriteElf(art::File* file,
OatWriter* oat_writer,
const std::vector<const art::DexFile*>& dex_files,
const std::string& android_root,
- bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ }
- void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {}
+ void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
void Init() OVERRIDE;
- void UnInit() const OVERRIDE {}
+ void UnInit() const OVERRIDE;
private:
// Whether we should run any optimization or register allocation. If false, will
@@ -214,6 +230,9 @@ class OptimizingCompiler FINAL : public Compiler {
std::unique_ptr<std::ostream> visualizer_output_;
+ // Delegate to Quick in case the optimizing compiler cannot compile a method.
+ std::unique_ptr<Compiler> delegate_;
+
DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
};
@@ -222,10 +241,13 @@ static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
: Compiler(driver, kMaximumCompilationTimeBeforeWarning),
run_optimizations_(
- driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
- compilation_stats_() {}
+ (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime)
+ && !driver->GetCompilerOptions().GetDebuggable()),
+ compilation_stats_(),
+ delegate_(Create(driver, Compiler::Kind::kQuick)) {}
void OptimizingCompiler::Init() {
+ delegate_->Init();
// Enable C1visualizer output. Must be done in Init() because the compiler
// driver is not fully initialized when passed to the compiler's constructor.
CompilerDriver* driver = GetCompilerDriver();
@@ -238,34 +260,24 @@ void OptimizingCompiler::Init() {
}
}
+void OptimizingCompiler::UnInit() const {
+ delegate_->UnInit();
+}
+
OptimizingCompiler::~OptimizingCompiler() {
compilation_stats_.Log();
}
+void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const {
+ delegate_->InitCompilationUnit(cu);
+}
+
bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
const DexFile& dex_file ATTRIBUTE_UNUSED,
CompilationUnit* cu ATTRIBUTE_UNUSED) const {
return true;
}
-CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
- uint32_t method_idx,
- const DexFile& dex_file) const {
- return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
-}
-
-uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
- return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
- InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
-}
-
-bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer,
- const std::vector<const art::DexFile*>& dex_files,
- const std::string& android_root, bool is_host) const {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
-}
-
static bool IsInstructionSetSupported(InstructionSet instruction_set) {
return instruction_set == kArm64
|| (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat)
@@ -298,8 +310,6 @@ static void RunOptimizations(HGraph* graph,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer,
StackHandleScopeCollection* handles) {
- SsaRedundantPhiElimination redundant_phi(graph);
- SsaDeadPhiElimination dead_phi(graph);
HDeadCodeElimination dce(graph);
HConstantFolding fold1(graph);
InstructionSimplifier simplify1(graph, stats);
@@ -317,8 +327,6 @@ static void RunOptimizations(HGraph* graph,
IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
HOptimization* optimizations[] = {
- &redundant_phi,
- &dead_phi,
&intrinsics,
&dce,
&fold1,
@@ -425,13 +433,13 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
ArrayRef<const uint8_t>());
}
-CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
- uint32_t access_flags,
- InvokeType invoke_type,
- uint16_t class_def_idx,
- uint32_t method_idx,
- jobject class_loader,
- const DexFile& dex_file) const {
+CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
+ uint32_t access_flags,
+ InvokeType invoke_type,
+ uint16_t class_def_idx,
+ uint32_t method_idx,
+ jobject class_loader,
+ const DexFile& dex_file) const {
UNUSED(invoke_type);
std::string method_name = PrettyMethod(method_idx, dex_file);
compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation);
@@ -461,12 +469,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
ArenaPool pool;
ArenaAllocator arena(&pool);
- HGraph* graph = new (&arena) HGraph(&arena);
+ HGraph* graph = new (&arena) HGraph(
+ &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
// For testing purposes, we put a special marker on method names that should be compiled
// with this compiler. This makes sure we're not regressing.
bool shouldCompile = method_name.find("$opt$") != std::string::npos;
- bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos;
+ bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
@@ -504,6 +513,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
bool can_optimize = CanOptimize(*code_item);
bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
+
+ // `run_optimizations_` is set explicitly (either through a compiler filter
+ // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
+ // to Quick.
+ bool can_use_baseline = !run_optimizations_;
if (run_optimizations_ && can_optimize && can_allocate_registers) {
VLOG(compiler) << "Optimizing " << method_name;
@@ -523,10 +537,10 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
dex_file,
dex_compilation_unit,
&pass_info_printer);
- } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
+ } else if (shouldOptimize && can_allocate_registers) {
LOG(FATAL) << "Could not allocate registers in optimizing compiler";
UNREACHABLE();
- } else {
+ } else if (can_use_baseline) {
VLOG(compiler) << "Compile baseline " << method_name;
if (!run_optimizations_) {
@@ -538,7 +552,25 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
}
return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit);
+ } else {
+ return nullptr;
+ }
+}
+
+CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
+ uint32_t access_flags,
+ InvokeType invoke_type,
+ uint16_t class_def_idx,
+ uint32_t method_idx,
+ jobject class_loader,
+ const DexFile& dex_file) const {
+ CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
+ method_idx, class_loader, dex_file);
+ if (method != nullptr) {
+ return method;
}
+ return delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
+ class_loader, dex_file);
}
Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index fe23fcf326..c20c8a172d 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -33,7 +33,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_
// to merge with a void type, we should use the existing one.
return new_type == Primitive::kPrimVoid
? existing
- : new_type;
+ : HPhi::ToPhiType(new_type);
}
}
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 748ab2259e..cecc210cbf 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -16,6 +16,7 @@
#include "register_allocator.h"
+#include <iostream>
#include <sstream>
#include "base/bit_vector-inl.h"
@@ -32,6 +33,9 @@ static constexpr size_t kDefaultNumberOfSpillSlots = 4;
// allocate SRegister.
static int GetHighForLowRegister(int reg) { return reg + 1; }
static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+static bool IsLowOfUnalignedPairInterval(LiveInterval* low) {
+ return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister();
+}
RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
CodeGenerator* codegen,
@@ -70,28 +74,13 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
}
-bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph,
+bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
InstructionSet instruction_set) {
- if (!Supports(instruction_set)) {
- return false;
- }
- if (instruction_set == kArm64
+ return instruction_set == kArm64
|| instruction_set == kX86_64
|| instruction_set == kArm
- || instruction_set == kThumb2) {
- return true;
- }
- for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) {
- for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions());
- !it.Done();
- it.Advance()) {
- HInstruction* current = it.Current();
- if (instruction_set == kX86 && current->GetType() == Primitive::kPrimLong) {
- return false;
- }
- }
- }
- return true;
+ || instruction_set == kX86
+ || instruction_set == kThumb2;
}
static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
@@ -771,8 +760,15 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
return false;
}
- if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) {
- return false;
+ if (current->IsLowInterval()) {
+ // If the high register of this interval is not available, we need to spill.
+ int high_reg = current->GetHighInterval()->GetRegister();
+ if (high_reg == kNoRegister) {
+ high_reg = GetHighForLowRegister(reg);
+ }
+ if (free_until[high_reg] == 0) {
+ return false;
+ }
}
current->SetRegister(reg);
@@ -831,16 +827,18 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use) const {
return reg;
}
-bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position,
- size_t first_register_use,
- size_t* next_use) {
+bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use) {
for (size_t i = 0, e = active_.Size(); i < e; ++i) {
LiveInterval* active = active_.Get(i);
DCHECK(active->HasRegister());
+ if (active->IsFixed()) continue;
+ if (active->IsHighInterval()) continue;
+ if (first_register_use > next_use[active->GetRegister()]) continue;
+
// Split the first interval found.
- if (first_register_use <= next_use[active->GetRegister()]
- && !active->IsLowInterval()
- && !active->IsHighInterval()) {
+ if (!active->IsLowInterval() || IsLowOfUnalignedPairInterval(active)) {
LiveInterval* split = Split(active, position);
active_.DeleteAt(i);
if (split != active) {
@@ -921,7 +919,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
// When allocating the low part, we made sure the high register was available.
DCHECK_LT(first_register_use, next_use[reg]);
} else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(next_use, current->GetStart());
+ reg = FindAvailableRegisterPair(next_use, first_register_use);
// We should spill if both registers are not available.
should_spill = (first_register_use >= next_use[reg])
|| (first_register_use >= next_use[GetHighForLowRegister(reg)]);
@@ -934,14 +932,17 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
DCHECK_NE(reg, kNoRegister);
if (should_spill) {
DCHECK(!current->IsHighInterval());
- bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1));
+ bool is_allocation_at_use_site = (current->GetStart() >= (first_register_use - 1));
if (current->IsLowInterval()
&& is_allocation_at_use_site
- && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) {
+ && TrySplitNonPairOrUnalignedPairIntervalAt(current->GetStart(),
+ first_register_use,
+ next_use)) {
// If we're allocating a register for `current` because the instruction at
// that position requires it, but we think we should spill, then there are
- // non-pair intervals blocking the allocation. We split the first
- // interval found, and put ourselves first in the `unhandled_` list.
+ // non-pair intervals or unaligned pair intervals blocking the allocation.
+ // We split the first interval found, and put ourselves first in the
+ // `unhandled_` list.
LiveInterval* existing = unhandled_->Peek();
DCHECK(existing->IsHighInterval());
DCHECK_EQ(existing->GetLowInterval(), current);
@@ -951,10 +952,15 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
// register, we split this interval just before its first register use.
AllocateSpillSlotFor(current);
LiveInterval* split = Split(current, first_register_use - 1);
- DCHECK_NE(current, split) << "There is not enough registers available for "
- << split->GetParent()->GetDefinedBy()->DebugName() << " "
- << split->GetParent()->GetDefinedBy()->GetId()
- << " at " << first_register_use - 1;
+ if (current == split) {
+ DumpInterval(std::cerr, current);
+ DumpAllIntervals(std::cerr);
+ // This situation has the potential to infinite loop, so we make it a non-debug CHECK.
+ CHECK(false) << "There is not enough registers available for "
+ << split->GetParent()->GetDefinedBy()->DebugName() << " "
+ << split->GetParent()->GetDefinedBy()->GetId()
+ << " at " << first_register_use - 1;
+ }
AddSorted(unhandled_, split);
}
return false;
@@ -1203,7 +1209,24 @@ static bool IsValidDestination(Location destination) {
|| destination.IsDoubleStackSlot();
}
-void RegisterAllocator::AddInputMoveFor(HInstruction* user,
+void RegisterAllocator::AddMove(HParallelMove* move,
+ Location source,
+ Location destination,
+ HInstruction* instruction,
+ Primitive::Type type) const {
+ if (type == Primitive::kPrimLong
+ && codegen_->ShouldSplitLongMoves()
+ // The parallel move resolver knows how to deal with long constants.
+ && !source.IsConstant()) {
+ move->AddMove(source.ToLow(), destination.ToLow(), instruction);
+ move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
+ } else {
+ move->AddMove(source, destination, instruction);
+ }
+}
+
+void RegisterAllocator::AddInputMoveFor(HInstruction* input,
+ HInstruction* user,
Location source,
Location destination) const {
if (source.Equals(destination)) return;
@@ -1222,7 +1245,7 @@ void RegisterAllocator::AddInputMoveFor(HInstruction* user,
move = previous->AsParallelMove();
}
DCHECK_EQ(move->GetLifetimePosition(), user->GetLifetimePosition());
- move->AddMove(source, destination, nullptr);
+ AddMove(move, source, destination, nullptr, input->GetType());
}
static bool IsInstructionStart(size_t position) {
@@ -1251,8 +1274,16 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
at = liveness_.GetInstructionFromPosition((position + 1) / 2);
// Note that parallel moves may have already been inserted, so we explicitly
// ask for the first instruction of the block: `GetInstructionFromPosition` does
- // not contain the moves.
+ // not contain the `HParallelMove` instructions.
at = at->GetBlock()->GetFirstInstruction();
+
+ if (at->GetLifetimePosition() < position) {
+ // We may insert moves for split siblings and phi spills at the beginning of the block.
+ // Since this is a different lifetime position, we need to go to the next instruction.
+ DCHECK(at->IsParallelMove());
+ at = at->GetNext();
+ }
+
if (at->GetLifetimePosition() != position) {
DCHECK_GT(at->GetLifetimePosition(), position);
move = new (allocator_) HParallelMove(allocator_);
@@ -1294,7 +1325,7 @@ void RegisterAllocator::InsertParallelMoveAt(size_t position,
}
}
DCHECK_EQ(move->GetLifetimePosition(), position);
- move->AddMove(source, destination, instruction);
+ AddMove(move, source, destination, instruction, instruction->GetType());
}
void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
@@ -1324,7 +1355,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
} else {
move = previous->AsParallelMove();
}
- move->AddMove(source, destination, instruction);
+ AddMove(move, source, destination, instruction, instruction->GetType());
}
void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
@@ -1336,14 +1367,15 @@ void RegisterAllocator::InsertParallelMoveAtEntryOf(HBasicBlock* block,
HInstruction* first = block->GetFirstInstruction();
HParallelMove* move = first->AsParallelMove();
+ size_t position = block->GetLifetimeStart();
// This is a parallel move for connecting blocks. We need to differentiate
// it with moves for connecting siblings in a same block, and input moves.
- if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) {
+ if (move == nullptr || move->GetLifetimePosition() != position) {
move = new (allocator_) HParallelMove(allocator_);
- move->SetLifetimePosition(block->GetLifetimeStart());
+ move->SetLifetimePosition(position);
block->InsertInstructionBefore(move, first);
}
- move->AddMove(source, destination, instruction);
+ AddMove(move, source, destination, instruction, instruction->GetType());
}
void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
@@ -1367,7 +1399,7 @@ void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
move->SetLifetimePosition(position);
instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
}
- move->AddMove(source, destination, instruction);
+ AddMove(move, source, destination, instruction, instruction->GetType());
}
void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
@@ -1402,7 +1434,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
if (expected_location.IsUnallocated()) {
locations->SetInAt(use->GetInputIndex(), source);
} else if (!expected_location.IsConstant()) {
- AddInputMoveFor(use->GetUser(), source, expected_location);
+ AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
}
} else {
DCHECK(use->GetUser()->IsInvoke());
@@ -1657,7 +1689,7 @@ void RegisterAllocator::Resolve() {
Location source = input->GetLiveInterval()->GetLocationAt(
predecessor->GetLifetimeEnd() - 1);
Location destination = phi->GetLiveInterval()->ToLocation();
- InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination);
+ InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
}
}
}
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 579f069f5e..fcc61128a6 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -66,13 +66,6 @@ class RegisterAllocator {
bool log_fatal_on_failure);
static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
- static bool Supports(InstructionSet instruction_set) {
- return instruction_set == kArm
- || instruction_set == kArm64
- || instruction_set == kThumb2
- || instruction_set == kX86
- || instruction_set == kX86_64;
- }
size_t GetNumberOfSpillSlots() const {
return int_spill_slots_.Size()
@@ -121,12 +114,21 @@ class RegisterAllocator {
Location source,
Location destination) const;
void InsertMoveAfter(HInstruction* instruction, Location source, Location destination) const;
- void AddInputMoveFor(HInstruction* user, Location source, Location destination) const;
+ void AddInputMoveFor(HInstruction* input,
+ HInstruction* user,
+ Location source,
+ Location destination) const;
void InsertParallelMoveAt(size_t position,
HInstruction* instruction,
Location source,
Location destination) const;
+ void AddMove(HParallelMove* move,
+ Location source,
+ Location destination,
+ HInstruction* instruction,
+ Primitive::Type type) const;
+
// Helper methods.
void AllocateRegistersInternal();
void ProcessInstruction(HInstruction* instruction);
@@ -136,9 +138,11 @@ class RegisterAllocator {
int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
int FindAvailableRegister(size_t* next_use) const;
- // Try splitting an active non-pair interval at the given `position`.
+ // Try splitting an active non-pair or unaligned pair interval at the given `position`.
// Returns whether it was successful at finding such an interval.
- bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use);
+ bool TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use);
ArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 3dc75059b2..ba11e90d9c 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -22,6 +22,158 @@
namespace art {
+/**
+ * A debuggable application may require to reviving phis, to ensure their
+ * associated DEX register is available to a debugger. This class implements
+ * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It
+ * also makes sure that phis with incompatible input types are not revived
+ * (statement (b) of the SsaBuilder).
+ *
+ * This phase must be run after detecting dead phis through the
+ * DeadPhiElimination phase, and before deleting the dead phis.
+ */
+class DeadPhiHandling : public ValueObject {
+ public:
+ explicit DeadPhiHandling(HGraph* graph)
+ : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+
+ void Run();
+
+ private:
+ void VisitBasicBlock(HBasicBlock* block);
+ void ProcessWorklist();
+ void AddToWorklist(HPhi* phi);
+ void AddDependentInstructionsToWorklist(HPhi* phi);
+ bool UpdateType(HPhi* phi);
+
+ HGraph* const graph_;
+ GrowableArray<HPhi*> worklist_;
+
+ static constexpr size_t kDefaultWorklistSize = 8;
+
+ DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
+};
+
+bool DeadPhiHandling::UpdateType(HPhi* phi) {
+ Primitive::Type existing = phi->GetType();
+ DCHECK(phi->IsLive());
+
+ bool conflict = false;
+ Primitive::Type new_type = existing;
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ HInstruction* input = phi->InputAt(i);
+ if (input->IsPhi() && input->AsPhi()->IsDead()) {
+ // We are doing a reverse post order visit of the graph, reviving
+ // phis that have environment uses and updating their types. If an
+ // input is a phi, and it is dead (because its input types are
+ // conflicting), this phi must be marked dead as well.
+ conflict = true;
+ break;
+ }
+ Primitive::Type input_type = HPhi::ToPhiType(input->GetType());
+
+ // The only acceptable transitions are:
+ // - From void to typed: first time we update the type of this phi.
+ // - From int to reference (or reference to int): the phi has to change
+ // to reference type. If the integer input cannot be converted to a
+ // reference input, the phi will remain dead.
+ if (new_type == Primitive::kPrimVoid) {
+ new_type = input_type;
+ } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
+ HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
+ if (equivalent == nullptr) {
+ conflict = true;
+ break;
+ } else {
+ phi->ReplaceInput(equivalent, i);
+ if (equivalent->IsPhi()) {
+ DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
+ // We created a new phi, but that phi has the same inputs as the old phi. We
+ // add it to the worklist to ensure its inputs can also be converted to reference.
+ // If not, it will remain dead, and the algorithm will make the current phi dead
+ // as well.
+ equivalent->AsPhi()->SetLive();
+ AddToWorklist(equivalent->AsPhi());
+ }
+ }
+ } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
+ new_type = Primitive::kPrimNot;
+ // Start over, we may request reference equivalents for the inputs of the phi.
+ i = -1;
+ } else if (new_type != input_type) {
+ conflict = true;
+ break;
+ }
+ }
+
+ if (conflict) {
+ phi->SetType(Primitive::kPrimVoid);
+ phi->SetDead();
+ return true;
+ } else {
+ DCHECK(phi->IsLive());
+ phi->SetType(new_type);
+ return existing != new_type;
+ }
+}
+
+void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* phi = it.Current()->AsPhi();
+ if (phi->IsDead() && phi->HasEnvironmentUses()) {
+ phi->SetLive();
+ if (block->IsLoopHeader()) {
+ // Give a type to the loop phi, to guarantee convergence of the algorithm.
+ phi->SetType(phi->InputAt(0)->GetType());
+ AddToWorklist(phi);
+ } else {
+ // Because we are doing a reverse post order visit, all inputs of
+ // this phi have been visited and therefore had their (initial) type set.
+ UpdateType(phi);
+ }
+ }
+ }
+}
+
+void DeadPhiHandling::ProcessWorklist() {
+ while (!worklist_.IsEmpty()) {
+ HPhi* instruction = worklist_.Pop();
+ // Note that the same equivalent phi can be added multiple times in the work list, if
+ // used by multiple phis. The first call to `UpdateType` will know whether the phi is
+ // dead or live.
+ if (instruction->IsLive() && UpdateType(instruction)) {
+ AddDependentInstructionsToWorklist(instruction);
+ }
+ }
+}
+
+void DeadPhiHandling::AddToWorklist(HPhi* instruction) {
+ DCHECK(instruction->IsLive());
+ worklist_.Add(instruction);
+}
+
+void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) {
+ for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+ HPhi* phi = it.Current()->GetUser()->AsPhi();
+ if (phi != nullptr && !phi->IsDead()) {
+ AddToWorklist(phi);
+ }
+ }
+}
+
+void DeadPhiHandling::Run() {
+ for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+ VisitBasicBlock(it.Current());
+ }
+ ProcessWorklist();
+}
+
+static bool IsPhiEquivalentOf(HInstruction* instruction, HPhi* phi) {
+ return instruction != nullptr
+ && instruction->IsPhi()
+ && instruction->AsPhi()->GetRegNumber() == phi->GetRegNumber();
+}
+
void SsaBuilder::BuildSsa() {
// 1) Visit in reverse post order. We need to have all predecessors of a block visited
// (with the exception of loops) in order to create the right environment for that
@@ -47,11 +199,9 @@ void SsaBuilder::BuildSsa() {
// our code generator will complain if the inputs of a phi do not have the same
// type. The marking allows the type propagation to know which phis it needs
// to handle. We mark but do not eliminate: the elimination will be done in
- // step 5).
- {
- SsaDeadPhiElimination dead_phis(GetGraph());
- dead_phis.MarkDeadPhis();
- }
+ // step 9).
+ SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph());
+ dead_phis_for_type_propagation.MarkDeadPhis();
// 4) Propagate types of phis. At this point, phis are typed void in the general
// case, or float/double/reference when we created an equivalent phi. So we
@@ -59,17 +209,62 @@ void SsaBuilder::BuildSsa() {
PrimitiveTypePropagation type_propagation(GetGraph());
type_propagation.Run();
- // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run
- // the algorithm and this time elimimates them.
- // TODO: Make this work with debug info and reference liveness. We currently
- // eagerly remove phis used in environments.
- {
- SsaDeadPhiElimination dead_phis(GetGraph());
- dead_phis.Run();
+ // 5) Mark dead phis again. Steph 4) may have introduced new phis.
+ SsaDeadPhiElimination dead_phis(GetGraph());
+ dead_phis.MarkDeadPhis();
+
+ // 6) Now that the graph is correclty typed, we can get rid of redundant phis.
+ // Note that we cannot do this phase before type propagation, otherwise
+ // we could get rid of phi equivalents, whose presence is a requirement for the
+ // type propagation phase. Note that this is to satisfy statement (a) of the
+ // SsaBuilder (see ssa_builder.h).
+ SsaRedundantPhiElimination redundant_phi(GetGraph());
+ redundant_phi.Run();
+
+ // 7) Make sure environments use the right phi "equivalent": a phi marked dead
+ // can have a phi equivalent that is not dead. We must therefore update
+ // all environment uses of the dead phi to use its equivalent. Note that there
+ // can be multiple phis for the same Dex register that are live (for example
+ // when merging constants), in which case it is OK for the environments
+ // to just reference one.
+ for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
+ HPhi* phi = it_phis.Current()->AsPhi();
+ // If the phi is not dead, or has no environment uses, there is nothing to do.
+ if (!phi->IsDead() || !phi->HasEnvironmentUses()) continue;
+ HInstruction* next = phi->GetNext();
+ if (!IsPhiEquivalentOf(next, phi)) continue;
+ if (next->AsPhi()->IsDead()) {
+ // If the phi equivalent is dead, check if there is another one.
+ next = next->GetNext();
+ if (!IsPhiEquivalentOf(next, phi)) continue;
+ // There can be at most two phi equivalents.
+ DCHECK(!IsPhiEquivalentOf(next->GetNext(), phi));
+ if (next->AsPhi()->IsDead()) continue;
+ }
+ // We found a live phi equivalent. Update the environment uses of `phi` with it.
+ phi->ReplaceWith(next);
+ }
}
- // 6) Clear locals.
- // TODO: Move this to a dead code eliminator phase.
+ // 8) Deal with phis to guarantee liveness of phis in case of a debuggable
+ // application. This is for satisfying statement (c) of the SsaBuilder
+ // (see ssa_builder.h).
+ if (GetGraph()->IsDebuggable()) {
+ DeadPhiHandling dead_phi_handler(GetGraph());
+ dead_phi_handler.Run();
+ }
+
+ // 9) Now that the right phis are used for the environments, and we
+ // have potentially revive dead phis in case of a debuggable application,
+ // we can eliminate phis we do not need. Regardless of the debuggable status,
+ // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h),
+ // as well as for the code generation, which does not deal with phis of conflicting
+ // input types.
+ dead_phis.EliminateDeadPhis();
+
+ // 10) Clear locals.
for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
!it.Done();
it.Advance()) {
@@ -257,12 +452,12 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
}
HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
- if (value->IsIntConstant()) {
- DCHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+ if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) {
return value->GetBlock()->GetGraph()->GetNullConstant();
- } else {
- DCHECK(value->IsPhi());
+ } else if (value->IsPhi()) {
return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
+ } else {
+ return nullptr;
}
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index f50da46040..24dc449513 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -24,6 +24,28 @@ namespace art {
static constexpr int kDefaultNumberOfLoops = 2;
+/**
+ * Transforms a graph into SSA form. The liveness guarantees of
+ * this transformation are listed below. A DEX register
+ * being killed means its value at a given position in the code
+ * will not be available to its environment uses. A merge in the
+ * following text is materialized as a `HPhi`.
+ *
+ * (a) Dex registers that do not require merging (that is, they do not
+ * have different values at a join block) are available to all their
+ * environment uses. Note that it does not imply the instruction will
+ * have a physical location after register allocation. See the
+ * SsaLivenessAnalysis phase.
+ *
+ * (b) Dex registers that require merging, and the merging gives
+ * incompatible types, will be killed for environment uses of that merge.
+ *
+ * (c) When the `debuggable` flag is passed to the compiler, Dex registers
+ * that require merging and have a proper type after the merge, are
+ * available to all their environment uses. If the `debuggable` flag
+ * is not set, values of Dex registers only used by environments
+ * are killed.
+ */
class SsaBuilder : public HGraphVisitor {
public:
explicit SsaBuilder(HGraph* graph)
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index d009390a0f..c0d6f42ca5 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -230,11 +230,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
}
if (current->HasEnvironment()) {
- // All instructions in the environment must be live.
+ // Handle environment uses. See statements (b) and (c) of the
+ // SsaLivenessAnalysis.
HEnvironment* environment = current->GetEnvironment();
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* instruction = environment->GetInstructionAt(i);
- if (instruction != nullptr) {
+ if (ShouldBeLiveForEnvironment(instruction)) {
DCHECK(instruction->HasSsaIndex());
live_in->SetBit(instruction->GetSsaIndex());
instruction->GetLiveInterval()->AddUse(current, i, true);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 9ff2f205d8..b57029d1a7 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -302,7 +302,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
first_range_->start_ = from;
} else {
// Instruction without uses.
- DCHECK(!defined_by_->HasUses());
+ DCHECK(!defined_by_->HasNonEnvironmentUses());
DCHECK(from == defined_by_->GetLifetimePosition());
first_range_ = last_range_ = new (allocator_) LiveRange(from, from + 2, nullptr);
}
@@ -373,13 +373,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
if (location.IsUnallocated()) {
if ((location.GetPolicy() == Location::kRequiresRegister)
|| (location.GetPolicy() == Location::kSameAsFirstInput
- && locations->InAt(0).GetPolicy() == Location::kRequiresRegister)) {
+ && (locations->InAt(0).IsRegister()
+ || locations->InAt(0).IsRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
return position;
} else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
|| (location.GetPolicy() == Location::kSameAsFirstInput
&& locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
return position;
}
+ } else if (location.IsRegister() || location.IsRegisterPair()) {
+ return position;
}
}
@@ -794,6 +798,22 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
DISALLOW_COPY_AND_ASSIGN(LiveInterval);
};
+/**
+ * Analysis that computes the liveness of instructions:
+ *
+ * (a) Non-environment uses of an instruction always make
+ * the instruction live.
+ * (b) Environment uses of an instruction whose type is
+ * object (that is, non-primitive), make the instruction live.
+ * This is due to having to keep alive objects that have
+ * finalizers deleting native objects.
+ * (c) When the graph has the debuggable property, environment uses
+ * of an instruction that has a primitive type make the instruction live.
+ * If the graph does not have the debuggable property, the environment
+ * use has no effect, and may get a 'none' value after register allocation.
+ *
+ * (b) and (c) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment.
+ */
class SsaLivenessAnalysis : public ValueObject {
public:
SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen)
@@ -878,6 +898,12 @@ class SsaLivenessAnalysis : public ValueObject {
// Update the live_out set of the block and returns whether it has changed.
bool UpdateLiveOut(const HBasicBlock& block);
+ static bool ShouldBeLiveForEnvironment(HInstruction* instruction) {
+ if (instruction == nullptr) return false;
+ if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true;
+ return instruction->GetType() == Primitive::kPrimNot;
+ }
+
const HGraph& graph_;
CodeGenerator* const codegen_;
GrowableArray<HBasicBlock*> linear_order_;
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index a05b38c9eb..00c241b85a 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -332,8 +332,8 @@ TEST(SsaTest, Loop5) {
const char* expected =
"BasicBlock 0, succ: 1\n"
" 0: IntConstant 0 [4, 4]\n"
- " 1: IntConstant 4 [14]\n"
- " 2: IntConstant 5 [14]\n"
+ " 1: IntConstant 4 [13]\n"
+ " 2: IntConstant 5 [13]\n"
" 3: Goto\n"
"BasicBlock 1, pred: 0, succ: 3, 2\n"
" 4: Equal(0, 0) [5]\n"
@@ -343,18 +343,17 @@ TEST(SsaTest, Loop5) {
"BasicBlock 3, pred: 1, succ: 8\n"
" 7: Goto\n"
"BasicBlock 4, pred: 8, 5, succ: 6, 5\n"
- " 8: Phi(14, 8) [8, 12, 9, 9]\n"
- " 9: Equal(8, 8) [10]\n"
- " 10: If(9)\n"
+ " 8: Equal(13, 13) [9]\n"
+ " 9: If(8)\n"
"BasicBlock 5, pred: 4, succ: 4\n"
- " 11: Goto\n"
+ " 10: Goto\n"
"BasicBlock 6, pred: 4, succ: 7\n"
- " 12: Return(8)\n"
+ " 11: Return(13)\n"
"BasicBlock 7, pred: 6\n"
- " 13: Exit\n"
+ " 12: Exit\n"
"BasicBlock 8, pred: 2, 3, succ: 4\n"
- " 14: Phi(1, 2) [8]\n"
- " 15: Goto\n";
+ " 13: Phi(1, 2) [8, 8, 11]\n"
+ " 14: Goto\n";
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 5283d5dcca..63a02862b4 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -20,6 +20,7 @@
#include "base/bit_vector.h"
#include "base/value_object.h"
#include "memory_region.h"
+#include "nodes.h"
#include "stack_map.h"
#include "utils/growable_array.h"
@@ -32,8 +33,9 @@ namespace art {
class StackMapStream : public ValueObject {
public:
explicit StackMapStream(ArenaAllocator* allocator)
- : stack_maps_(allocator, 10),
- dex_register_maps_(allocator, 10 * 4),
+ : allocator_(allocator),
+ stack_maps_(allocator, 10),
+ dex_register_locations_(allocator, 10 * 4),
inline_infos_(allocator, 2),
stack_mask_max_(-1),
number_of_stack_maps_with_inline_info_(0) {}
@@ -52,13 +54,9 @@ class StackMapStream : public ValueObject {
BitVector* sp_mask;
uint32_t num_dex_registers;
uint8_t inlining_depth;
- size_t dex_register_maps_start_index;
+ size_t dex_register_locations_start_index;
size_t inline_infos_start_index;
- };
-
- struct DexRegisterEntry {
- DexRegisterMap::LocationKind kind;
- int32_t value;
+ BitVector* live_dex_registers_mask;
};
struct InlineInfoEntry {
@@ -78,8 +76,14 @@ class StackMapStream : public ValueObject {
entry.sp_mask = sp_mask;
entry.num_dex_registers = num_dex_registers;
entry.inlining_depth = inlining_depth;
- entry.dex_register_maps_start_index = dex_register_maps_.Size();
+ entry.dex_register_locations_start_index = dex_register_locations_.Size();
entry.inline_infos_start_index = inline_infos_.Size();
+ if (num_dex_registers != 0) {
+ entry.live_dex_registers_mask =
+ new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
+ } else {
+ entry.live_dex_registers_mask = nullptr;
+ }
stack_maps_.Add(entry);
if (sp_mask != nullptr) {
@@ -90,13 +94,6 @@ class StackMapStream : public ValueObject {
}
}
- void AddDexRegisterEntry(DexRegisterMap::LocationKind kind, int32_t value) {
- DexRegisterEntry entry;
- entry.kind = kind;
- entry.value = value;
- dex_register_maps_.Add(entry);
- }
-
void AddInlineInfoEntry(uint32_t method_index) {
InlineInfoEntry entry;
entry.method_index = method_index;
@@ -104,47 +101,75 @@ class StackMapStream : public ValueObject {
}
size_t ComputeNeededSize() const {
- return CodeInfo::kFixedSize
- + ComputeStackMapSize()
- + ComputeDexRegisterMapSize()
+ size_t size = CodeInfo::kFixedSize
+ + ComputeStackMapsSize()
+ + ComputeDexRegisterMapsSize()
+ ComputeInlineInfoSize();
+ // On ARM, CodeInfo data must be 4-byte aligned.
+ return RoundUp(size, kWordAlignment);
+ }
+
+ size_t ComputeStackMaskSize() const {
+ return StackMaskEncodingSize(stack_mask_max_);
+ }
+
+ size_t ComputeStackMapsSize() const {
+ return stack_maps_.Size() * StackMap::ComputeStackMapSize(ComputeStackMaskSize());
}
- size_t ComputeStackMapSize() const {
- return stack_maps_.Size() * StackMap::ComputeAlignedStackMapSize(stack_mask_max_);
+ // Compute the size of the Dex register map of `entry`.
+ size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
+ size_t size = DexRegisterMap::kFixedSize;
+ // Add the bit mask for the dex register liveness.
+ size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
+ for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+ dex_register_number < entry.num_dex_registers;
+ ++dex_register_number) {
+ if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
+ DexRegisterLocation dex_register_location = dex_register_locations_.Get(
+ entry.dex_register_locations_start_index + index_in_dex_register_locations);
+ size += DexRegisterMap::EntrySize(dex_register_location);
+ index_in_dex_register_locations++;
+ }
+ }
+ return size;
}
- size_t ComputeDexRegisterMapSize() const {
- // We currently encode all dex register information per stack map.
- return stack_maps_.Size() * DexRegisterMap::kFixedSize
- // For each dex register entry.
- + (dex_register_maps_.Size() * DexRegisterMap::SingleEntrySize());
+ // Compute the size of all the Dex register maps.
+ size_t ComputeDexRegisterMapsSize() const {
+ size_t size = 0;
+ for (size_t i = 0; i < stack_maps_.Size(); ++i) {
+ size += ComputeDexRegisterMapSize(stack_maps_.Get(i));
+ }
+ return size;
}
+ // Compute the size of all the inline information pieces.
size_t ComputeInlineInfoSize() const {
return inline_infos_.Size() * InlineInfo::SingleEntrySize()
// For encoding the depth.
+ (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
}
- size_t ComputeInlineInfoStart() const {
- return ComputeDexRegisterMapStart() + ComputeDexRegisterMapSize();
+ size_t ComputeDexRegisterMapsStart() const {
+ return CodeInfo::kFixedSize + ComputeStackMapsSize();
}
- size_t ComputeDexRegisterMapStart() const {
- return CodeInfo::kFixedSize + ComputeStackMapSize();
+ size_t ComputeInlineInfoStart() const {
+ return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize();
}
void FillIn(MemoryRegion region) {
CodeInfo code_info(region);
+ DCHECK_EQ(region.size(), ComputeNeededSize());
code_info.SetOverallSize(region.size());
- size_t stack_mask_size = StackMaskEncodingSize(stack_mask_max_);
+ size_t stack_mask_size = ComputeStackMaskSize();
uint8_t* memory_start = region.start();
- MemoryRegion dex_register_maps_region = region.Subregion(
- ComputeDexRegisterMapStart(),
- ComputeDexRegisterMapSize());
+ MemoryRegion dex_register_locations_region = region.Subregion(
+ ComputeDexRegisterMapsStart(),
+ ComputeDexRegisterMapsSize());
MemoryRegion inline_infos_region = region.Subregion(
ComputeInlineInfoStart(),
@@ -152,6 +177,7 @@ class StackMapStream : public ValueObject {
code_info.SetNumberOfStackMaps(stack_maps_.Size());
code_info.SetStackMaskSize(stack_mask_size);
+ DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize());
uintptr_t next_dex_register_map_offset = 0;
uintptr_t next_inline_info_offset = 0;
@@ -167,20 +193,34 @@ class StackMapStream : public ValueObject {
}
if (entry.num_dex_registers != 0) {
- // Set the register map.
- MemoryRegion register_region = dex_register_maps_region.Subregion(
- next_dex_register_map_offset,
- DexRegisterMap::kFixedSize
- + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
+ // Set the Dex register map.
+ MemoryRegion register_region =
+ dex_register_locations_region.Subregion(
+ next_dex_register_map_offset,
+ ComputeDexRegisterMapSize(entry));
next_dex_register_map_offset += register_region.size();
DexRegisterMap dex_register_map(register_region);
stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
- for (size_t j = 0; j < entry.num_dex_registers; ++j) {
- DexRegisterEntry register_entry =
- dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
- dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+ // Offset in `dex_register_map` where to store the next register entry.
+ size_t offset = DexRegisterMap::kFixedSize;
+ dex_register_map.SetLiveBitMask(offset,
+ entry.num_dex_registers,
+ *entry.live_dex_registers_mask);
+ offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
+ for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+ dex_register_number < entry.num_dex_registers;
+ ++dex_register_number) {
+ if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
+ DexRegisterLocation dex_register_location = dex_register_locations_.Get(
+ entry.dex_register_locations_start_index + index_in_dex_register_locations);
+ dex_register_map.SetRegisterInfo(offset, dex_register_location);
+ offset += DexRegisterMap::EntrySize(dex_register_location);
+ ++index_in_dex_register_locations;
+ }
}
+ // Ensure we reached the end of the Dex registers region.
+ DCHECK_EQ(offset, register_region.size());
} else {
stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap);
}
@@ -206,13 +246,28 @@ class StackMapStream : public ValueObject {
}
}
+ void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) {
+ if (kind != DexRegisterLocation::Kind::kNone) {
+ // Ensure we only use non-compressed location kind at this stage.
+ DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
+ << DexRegisterLocation::PrettyDescriptor(kind);
+ dex_register_locations_.Add(DexRegisterLocation(kind, value));
+ stack_maps_.Get(stack_maps_.Size() - 1).live_dex_registers_mask->SetBit(dex_register);
+ }
+ }
+
private:
+ ArenaAllocator* allocator_;
GrowableArray<StackMapEntry> stack_maps_;
- GrowableArray<DexRegisterEntry> dex_register_maps_;
+ GrowableArray<DexRegisterLocation> dex_register_locations_;
GrowableArray<InlineInfoEntry> inline_infos_;
int stack_mask_max_;
size_t number_of_stack_maps_with_inline_info_;
+ ART_FRIEND_TEST(StackMapTest, Test1);
+ ART_FRIEND_TEST(StackMapTest, Test2);
+ ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters);
+
DISALLOW_COPY_AND_ASSIGN(StackMapStream);
};
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 5b025106ac..87ac2e79e9 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -22,7 +22,7 @@
namespace art {
-bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
+static bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
for (size_t i = 0; i < region.size_in_bits(); ++i) {
if (region.LoadBit(i) != bit_vector.IsBitSet(i)) {
return false;
@@ -31,11 +31,6 @@ bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
return true;
}
-size_t ComputeDexRegisterMapSize(size_t number_of_dex_registers) {
- return DexRegisterMap::kFixedSize
- + number_of_dex_registers * DexRegisterMap::SingleEntrySize();
-}
-
TEST(StackMapTest, Test1) {
ArenaPool pool;
ArenaAllocator arena(&pool);
@@ -44,8 +39,8 @@ TEST(StackMapTest, Test1) {
ArenaBitVector sp_mask(&arena, 0, false);
size_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0);
- stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2);
+ stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
+ stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -67,14 +62,16 @@ TEST(StackMapTest, Test1) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask));
ASSERT_TRUE(stack_map.HasDexRegisterMap());
- DexRegisterMap dex_registers =
- code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(16u, dex_registers.Size());
- ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers));
- ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
- ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
- ASSERT_EQ(0, dex_registers.GetValue(0));
- ASSERT_EQ(-2, dex_registers.GetValue(1));
+ DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_EQ(7u, dex_registers.Size());
+ DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
+ DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
+ ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_EQ(0, location0.GetValue());
+ ASSERT_EQ(-2, location1.GetValue());
ASSERT_FALSE(stack_map.HasInlineInfo());
}
@@ -89,8 +86,8 @@ TEST(StackMapTest, Test2) {
sp_mask1.SetBit(4);
size_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
- stream.AddDexRegisterEntry(DexRegisterMap::kInStack, 0);
- stream.AddDexRegisterEntry(DexRegisterMap::kConstant, -2);
+ stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
+ stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
stream.AddInlineInfoEntry(42);
stream.AddInlineInfoEntry(82);
@@ -98,8 +95,8 @@ TEST(StackMapTest, Test2) {
sp_mask2.SetBit(3);
sp_mask1.SetBit(8);
stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(DexRegisterMap::kInRegister, 18);
- stream.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, 3);
+ stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18);
+ stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3);
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -111,53 +108,95 @@ TEST(StackMapTest, Test2) {
ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
// First stack map.
- StackMap stack_map = code_info.GetStackMapAt(0);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
- ASSERT_EQ(0u, stack_map.GetDexPc());
- ASSERT_EQ(64u, stack_map.GetNativePcOffset());
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask());
+ {
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u, stack_map.GetNativePcOffset());
+ ASSERT_EQ(0x3u, stack_map.GetRegisterMask());
+
+ MemoryRegion stack_mask = stack_map.GetStackMask();
+ ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_registers =
+ code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_EQ(7u, dex_registers.Size());
+ DexRegisterLocation location0 =
+ dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
+ DexRegisterLocation location1 =
+ dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
+ ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_EQ(0, location0.GetValue());
+ ASSERT_EQ(-2, location1.GetValue());
+
+ ASSERT_TRUE(stack_map.HasInlineInfo());
+ InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map);
+ ASSERT_EQ(2u, inline_info.GetDepth());
+ ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0));
+ ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1));
+ }
- MemoryRegion stack_mask = stack_map.GetStackMask();
- ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
+ // Second stack map.
+ {
+ StackMap stack_map = code_info.GetStackMapAt(1);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u)));
+ ASSERT_EQ(1u, stack_map.GetDexPc());
+ ASSERT_EQ(128u, stack_map.GetNativePcOffset());
+ ASSERT_EQ(0xFFu, stack_map.GetRegisterMask());
+
+ MemoryRegion stack_mask = stack_map.GetStackMask();
+ ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_registers =
+ code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_EQ(3u, dex_registers.Size());
+ DexRegisterLocation location0 =
+ dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
+ DexRegisterLocation location1 =
+ dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
+ ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind());
+ ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind());
+ ASSERT_EQ(18, location0.GetValue());
+ ASSERT_EQ(3, location1.GetValue());
+
+ ASSERT_FALSE(stack_map.HasInlineInfo());
+ }
+}
- ASSERT_TRUE(stack_map.HasDexRegisterMap());
- DexRegisterMap dex_registers =
- code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(16u, dex_registers.Size());
- ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers));
- ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
- ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
- ASSERT_EQ(0, dex_registers.GetValue(0));
- ASSERT_EQ(-2, dex_registers.GetValue(1));
-
- ASSERT_TRUE(stack_map.HasInlineInfo());
- InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map);
- ASSERT_EQ(2u, inline_info.GetDepth());
- ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0));
- ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1));
+TEST(StackMapTest, TestNonLiveDexRegisters) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ StackMapStream stream(&arena);
- // Second stack map.
- stack_map = code_info.GetStackMapAt(1);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u)));
- ASSERT_EQ(1u, stack_map.GetDexPc());
- ASSERT_EQ(128u, stack_map.GetNativePcOffset());
- ASSERT_EQ(0xFFu, stack_map.GetRegisterMask());
+ ArenaBitVector sp_mask(&arena, 0, false);
+ uint32_t number_of_dex_registers = 2;
+ stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0);
+ stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
- stack_mask = stack_map.GetStackMask();
- ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
+ size_t size = stream.ComputeNeededSize();
+ void* memory = arena.Alloc(size, kArenaAllocMisc);
+ MemoryRegion region(memory, size);
+ stream.FillIn(region);
+ CodeInfo code_info(region);
+ StackMap stack_map = code_info.GetStackMapAt(0);
ASSERT_TRUE(stack_map.HasDexRegisterMap());
- dex_registers =
- code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(16u, dex_registers.Size());
- ASSERT_EQ(16u, ComputeDexRegisterMapSize(number_of_dex_registers));
- ASSERT_EQ(DexRegisterMap::kInRegister, dex_registers.GetLocationKind(0));
- ASSERT_EQ(DexRegisterMap::kInFpuRegister, dex_registers.GetLocationKind(1));
- ASSERT_EQ(18, dex_registers.GetValue(0));
- ASSERT_EQ(3, dex_registers.GetValue(1));
-
+ DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
+ ASSERT_EQ(DexRegisterLocation::Kind::kNone,
+ dex_registers.GetLocationKind(0, number_of_dex_registers));
+ ASSERT_EQ(DexRegisterLocation::Kind::kConstant,
+ dex_registers.GetLocationKind(1, number_of_dex_registers));
+ ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers));
ASSERT_FALSE(stack_map.HasInlineInfo());
}