summaryrefslogtreecommitdiffstats
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/boolean_simplifier.cc14
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc124
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc32
-rw-r--r--compiler/optimizing/builder.cc29
-rw-r--r--compiler/optimizing/builder.h6
-rw-r--r--compiler/optimizing/code_generator.cc29
-rw-r--r--compiler/optimizing/code_generator.h5
-rw-r--r--compiler/optimizing/code_generator_arm.cc132
-rw-r--r--compiler/optimizing/code_generator_arm.h4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc174
-rw-r--r--compiler/optimizing/code_generator_arm64.h10
-rw-r--r--compiler/optimizing/code_generator_x86.cc495
-rw-r--r--compiler/optimizing/code_generator_x86.h17
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc349
-rw-r--r--compiler/optimizing/code_generator_x86_64.h28
-rw-r--r--compiler/optimizing/codegen_test.cc18
-rw-r--r--compiler/optimizing/common_arm64.h4
-rw-r--r--compiler/optimizing/constant_folding_test.cc5
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc5
-rw-r--r--compiler/optimizing/graph_visualizer.cc10
-rw-r--r--compiler/optimizing/graph_visualizer.h4
-rw-r--r--compiler/optimizing/intrinsics.cc6
-rw-r--r--compiler/optimizing/intrinsics_arm.cc4
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc4
-rw-r--r--compiler/optimizing/intrinsics_x86.cc356
-rw-r--r--compiler/optimizing/intrinsics_x86.h3
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc441
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h3
-rw-r--r--compiler/optimizing/linearize_test.cc5
-rw-r--r--compiler/optimizing/live_ranges_test.cc25
-rw-r--r--compiler/optimizing/liveness_test.cc5
-rw-r--r--compiler/optimizing/nodes.cc4
-rw-r--r--compiler/optimizing/nodes.h43
-rw-r--r--compiler/optimizing/nodes_test.cc2
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc127
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc141
-rw-r--r--compiler/optimizing/optimizing_compiler.cc46
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc82
-rw-r--r--compiler/optimizing/parallel_move_resolver.h7
-rw-r--r--compiler/optimizing/parallel_move_test.cc27
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc4
-rw-r--r--compiler/optimizing/register_allocator.cc54
-rw-r--r--compiler/optimizing/register_allocator_test.cc65
-rw-r--r--compiler/optimizing/ssa_builder.cc2
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc32
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h25
-rw-r--r--compiler/optimizing/stack_map_stream.h173
-rw-r--r--compiler/optimizing/stack_map_test.cc333
48 files changed, 2941 insertions, 572 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index ab77505b6f..be432c5a20 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -59,7 +59,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
return new (allocator) HGreaterThan(lhs, rhs);
} else if (cond->IsGreaterThan()) {
return new (allocator) HLessThanOrEqual(lhs, rhs);
- } else if (cond->IsGreaterThanOrEqual()) {
+ } else {
+ DCHECK(cond->IsGreaterThanOrEqual());
return new (allocator) HLessThan(lhs, rhs);
}
} else if (cond->IsIntConstant()) {
@@ -70,10 +71,11 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
DCHECK(int_const->IsOne());
return graph->GetIntConstant(0);
}
+ } else {
+ // General case when 'cond' is another instruction of type boolean.
+ // Negate with 'cond == 0'.
+ return new (allocator) HEqual(cond, graph->GetIntConstant(0));
}
-
- // TODO: b/19992954
- return nullptr;
}
void HBooleanSimplifier::Run() {
@@ -105,10 +107,6 @@ void HBooleanSimplifier::Run() {
HInstruction* replacement;
if (NegatesCondition(true_value, false_value)) {
replacement = GetOppositeCondition(if_condition);
- if (replacement == nullptr) {
- // Something we could not handle.
- continue;
- }
if (replacement->GetBlock() == nullptr) {
block->InsertInstructionBefore(replacement, if_instruction);
}
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1d167949f4..6511120794 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -239,7 +239,6 @@ class ValueBound : public ValueObject {
*underflow = true;
return Min();
}
- return ValueBound(instruction_, new_constant);
}
private:
@@ -443,9 +442,31 @@ class MonotonicValueRange : public ValueRange {
class BCEVisitor : public HGraphVisitor {
public:
+ // The least number of bounds checks that should be eliminated by triggering
+ // the deoptimization technique.
+ static constexpr size_t kThresholdForAddingDeoptimize = 2;
+
+ // Very large constant index is considered as an anomaly. This is a threshold
+ // beyond which we don't bother to apply the deoptimization technique since
+ // it's likely some AIOOBE will be thrown.
+ static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024;
+
explicit BCEVisitor(HGraph* graph)
: HGraphVisitor(graph),
- maps_(graph->GetBlocks().Size()) {}
+ maps_(graph->GetBlocks().Size()),
+ need_to_revisit_block_(false) {}
+
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ first_constant_index_bounds_check_map_.clear();
+ HGraphVisitor::VisitBasicBlock(block);
+ if (need_to_revisit_block_) {
+ AddComparesWithDeoptimization(block);
+ need_to_revisit_block_ = false;
+ first_constant_index_bounds_check_map_.clear();
+ GetValueRangeMap(block)->clear();
+ HGraphVisitor::VisitBasicBlock(block);
+ }
+ }
private:
// Return the map of proven value ranges at the beginning of a basic block.
@@ -701,9 +722,26 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ if (first_constant_index_bounds_check_map_.find(array_length->GetId()) ==
+ first_constant_index_bounds_check_map_.end()) {
+ // Remember the first bounds check against array_length of a constant index.
+ // That bounds check instruction has an associated HEnvironment where we
+ // may add an HDeoptimize to eliminate bounds checks of constant indices
+ // against array_length.
+ first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
+ } else {
+ // We've seen it at least twice. It's beneficial to introduce a compare with
+ // deoptimization fallback to eliminate the bounds checks.
+ need_to_revisit_block_ = true;
+ }
+
// Once we have an array access like 'array[5] = 1', we record array.length >= 6.
// We currently don't do it for non-constant index since a valid array[i] can't prove
// a valid array[i-1] yet due to the lower bound side.
+ if (constant == INT_MAX) {
+ // INT_MAX as an index will definitely throw AIOOBE.
+ return;
+ }
ValueBound lower = ValueBound(nullptr, constant + 1);
ValueBound upper = ValueBound::Max();
ValueRange* range = new (GetGraph()->GetArena())
@@ -938,8 +976,90 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ void VisitDeoptimize(HDeoptimize* deoptimize) {
+ // Right now it's only HLessThanOrEqual.
+ DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+ HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
+ HInstruction* instruction = less_than_or_equal->InputAt(0);
+ if (instruction->IsArrayLength()) {
+ HInstruction* constant = less_than_or_equal->InputAt(1);
+ DCHECK(constant->IsIntConstant());
+ DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize);
+ ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1);
+ ValueRange* range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max());
+ GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range);
+ }
+ }
+
+ void AddCompareWithDeoptimization(HInstruction* array_length,
+ HIntConstant* const_instr,
+ HBasicBlock* block) {
+ DCHECK(array_length->IsArrayLength());
+ ValueRange* range = LookupValueRange(array_length, block);
+ ValueBound lower_bound = range->GetLower();
+ DCHECK(lower_bound.IsConstant());
+ DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize);
+ DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1);
+
+ // If array_length is less than lower_const, deoptimize.
+ HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get(
+ array_length->GetId())->AsBoundsCheck();
+ HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr);
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena())
+ HDeoptimize(cond, bounds_check->GetDexPc());
+ block->InsertInstructionBefore(cond, bounds_check);
+ block->InsertInstructionBefore(deoptimize, bounds_check);
+ deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+ }
+
+ void AddComparesWithDeoptimization(HBasicBlock* block) {
+ for (ArenaSafeMap<int, HBoundsCheck*>::iterator it =
+ first_constant_index_bounds_check_map_.begin();
+ it != first_constant_index_bounds_check_map_.end();
+ ++it) {
+ HBoundsCheck* bounds_check = it->second;
+ HArrayLength* array_length = bounds_check->InputAt(1)->AsArrayLength();
+ HIntConstant* lower_bound_const_instr = nullptr;
+ int32_t lower_bound_const = INT_MIN;
+ size_t counter = 0;
+ // Count the constant indexing for which bounds checks haven't
+ // been removed yet.
+ for (HUseIterator<HInstruction*> it2(array_length->GetUses());
+ !it2.Done();
+ it2.Advance()) {
+ HInstruction* user = it2.Current()->GetUser();
+ if (user->GetBlock() == block &&
+ user->IsBoundsCheck() &&
+ user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) {
+ DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1));
+ HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant();
+ if (const_instr->GetValue() > lower_bound_const) {
+ lower_bound_const = const_instr->GetValue();
+ lower_bound_const_instr = const_instr;
+ }
+ counter++;
+ }
+ }
+ if (counter >= kThresholdForAddingDeoptimize &&
+ lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) {
+ AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block);
+ }
+ }
+ }
+
std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
+ // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
+ // a block that checks a constant index against that HArrayLength.
+ SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+
+ // For the block, there is at least one HArrayLength instruction for which there
+ // is more than one bounds check instruction with constant indexing. And it's
+ // beneficial to add a compare instruction that has deoptimization fallback and
+ // eliminate those bounds checks.
+ bool need_to_revisit_block_;
+
DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
};
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index b3653fe903..75cf1cf063 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -284,9 +284,9 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
ASSERT_FALSE(IsRemoved(bounds_check));
}
-// array[5] = 1; // Can't eliminate.
-// array[4] = 1; // Can eliminate.
// array[6] = 1; // Can't eliminate.
+// array[5] = 1; // Can eliminate.
+// array[4] = 1; // Can eliminate.
TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
@@ -311,35 +311,35 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
- HBoundsCheck* bounds_check5 = new (&allocator)
- HBoundsCheck(constant_5, array_length, 0);
+ HBoundsCheck* bounds_check6 = new (&allocator)
+ HBoundsCheck(constant_6, array_length, 0);
HInstruction* array_set = new (&allocator) HArraySet(
- null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
+ null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
block->AddInstruction(null_check);
block->AddInstruction(array_length);
- block->AddInstruction(bounds_check5);
+ block->AddInstruction(bounds_check6);
block->AddInstruction(array_set);
null_check = new (&allocator) HNullCheck(parameter, 0);
array_length = new (&allocator) HArrayLength(null_check);
- HBoundsCheck* bounds_check4 = new (&allocator)
- HBoundsCheck(constant_4, array_length, 0);
+ HBoundsCheck* bounds_check5 = new (&allocator)
+ HBoundsCheck(constant_5, array_length, 0);
array_set = new (&allocator) HArraySet(
- null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
+ null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
block->AddInstruction(null_check);
block->AddInstruction(array_length);
- block->AddInstruction(bounds_check4);
+ block->AddInstruction(bounds_check5);
block->AddInstruction(array_set);
null_check = new (&allocator) HNullCheck(parameter, 0);
array_length = new (&allocator) HArrayLength(null_check);
- HBoundsCheck* bounds_check6 = new (&allocator)
- HBoundsCheck(constant_6, array_length, 0);
+ HBoundsCheck* bounds_check4 = new (&allocator)
+ HBoundsCheck(constant_4, array_length, 0);
array_set = new (&allocator) HArraySet(
- null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
+ null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
block->AddInstruction(null_check);
block->AddInstruction(array_length);
- block->AddInstruction(bounds_check6);
+ block->AddInstruction(bounds_check4);
block->AddInstruction(array_set);
block->AddInstruction(new (&allocator) HGoto());
@@ -353,9 +353,9 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
- ASSERT_FALSE(IsRemoved(bounds_check5));
- ASSERT_TRUE(IsRemoved(bounds_check4));
ASSERT_FALSE(IsRemoved(bounds_check6));
+ ASSERT_TRUE(IsRemoved(bounds_check5));
+ ASSERT_TRUE(IsRemoved(bounds_check4));
}
// for (int i=initial; i<array.length; i+=increment) { array[i] = 10; }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2cdd5af9f3..a912d4ccc4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -23,6 +23,7 @@
#include "dex_instruction.h"
#include "dex_instruction-inl.h"
#include "driver/compiler_driver-inl.h"
+#include "driver/compiler_options.h"
#include "mirror/art_field.h"
#include "mirror/art_field-inl.h"
#include "mirror/class_loader.h"
@@ -230,8 +231,7 @@ void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
}
}
-bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions,
- size_t number_of_blocks ATTRIBUTE_UNUSED,
+bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item,
size_t number_of_branches) {
const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
@@ -239,19 +239,20 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions,
return false;
}
- if (compiler_options.IsHugeMethod(number_of_dex_instructions)) {
+ if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) {
VLOG(compiler) << "Skip compilation of huge method "
<< PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
- << ": " << number_of_dex_instructions << " dex instructions";
+ << ": " << code_item.insns_size_in_code_units_ << " code units";
MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
return true;
}
// If it's large and contains no branches, it's likely to be machine generated initialization.
- if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) {
+ if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_)
+ && (number_of_branches == 0)) {
VLOG(compiler) << "Skip compilation of large method with no branch "
<< PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
- << ": " << number_of_dex_instructions << " dex instructions";
+ << ": " << code_item.insns_size_in_code_units_ << " code units";
MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
return true;
}
@@ -278,18 +279,14 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
// Compute the number of dex instructions, blocks, and branches. We will
// check these values against limits given to the compiler.
- size_t number_of_dex_instructions = 0;
- size_t number_of_blocks = 0;
size_t number_of_branches = 0;
// To avoid splitting blocks, we compute ahead of time the instructions that
// start a new block, and create these blocks.
- ComputeBranchTargets(
- code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches);
+ ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
// Note that the compiler driver is null when unit testing.
- if ((compiler_driver_ != nullptr)
- && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) {
+ if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
return false;
}
@@ -355,8 +352,6 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
const uint16_t* code_end,
- size_t* number_of_dex_instructions,
- size_t* number_of_blocks,
size_t* number_of_branches) {
branch_targets_.SetSize(code_end - code_ptr);
@@ -369,7 +364,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
// the locations these instructions branch to.
uint32_t dex_pc = 0;
while (code_ptr < code_end) {
- (*number_of_dex_instructions)++;
const Instruction& instruction = *Instruction::At(code_ptr);
if (instruction.IsBranch()) {
(*number_of_branches)++;
@@ -378,14 +372,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
if (FindBlockStartingAt(target) == nullptr) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(target, block);
- (*number_of_blocks)++;
}
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
- (*number_of_blocks)++;
}
} else if (instruction.IsSwitch()) {
SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
@@ -403,14 +395,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
if (FindBlockStartingAt(target) == nullptr) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(target, block);
- (*number_of_blocks)++;
}
// The next case gets its own block.
if (i < num_entries) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(table.GetDexPcForIndex(i), block);
- (*number_of_blocks)++;
}
}
@@ -420,7 +410,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
- (*number_of_blocks)++;
}
} else {
code_ptr += instruction.SizeInCodeUnits();
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 6a0738a7b9..dc6d97eb0c 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,6 @@ class HGraphBuilder : public ValueObject {
// branches.
void ComputeBranchTargets(const uint16_t* start,
const uint16_t* end,
- size_t* number_of_dex_instructions,
- size_t* number_of_block,
size_t* number_of_branches);
void MaybeUpdateCurrentBlock(size_t index);
HBasicBlock* FindBlockStartingAt(int32_t index) const;
@@ -217,9 +215,7 @@ class HGraphBuilder : public ValueObject {
HInstruction* value, int32_t case_value_int,
int32_t target_offset, uint32_t dex_pc);
- bool SkipCompilation(size_t number_of_dex_instructions,
- size_t number_of_blocks,
- size_t number_of_branches);
+ bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches);
void MaybeRecordStat(MethodCompilationStat compilation_stat);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bd6e943bf0..8736374306 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
GenerateFrameEntry();
+ DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
HBasicBlock* block = block_order_->Get(current_block_index_);
// Don't generate code for an empty block. Its predecessors will branch to its successor
@@ -132,7 +133,6 @@ size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
}
LOG(FATAL) << "Could not find a register in baseline register allocator";
UNREACHABLE();
- return -1;
}
size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
@@ -145,7 +145,6 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
}
LOG(FATAL) << "Could not find a register in baseline register allocator";
UNREACHABLE();
- return -1;
}
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
@@ -378,10 +377,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
case kMips:
return nullptr;
case kX86: {
- return new x86::CodeGeneratorX86(graph, compiler_options);
+ return new x86::CodeGeneratorX86(graph,
+ *isa_features.AsX86InstructionSetFeatures(),
+ compiler_options);
}
case kX86_64: {
- return new x86_64::CodeGeneratorX86_64(graph, compiler_options);
+ return new x86_64::CodeGeneratorX86_64(graph,
+ *isa_features.AsX86_64InstructionSetFeatures(),
+ compiler_options);
}
default:
return nullptr;
@@ -413,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap(
}
}
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const {
+void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
+ for (size_t i = 0; i < pc_infos_.Size(); i++) {
+ struct PcInfo pc_info = pc_infos_.Get(i);
+ uint32_t pc2dex_offset = pc_info.native_pc;
+ int32_t pc2dex_dalvik_offset = pc_info.dex_pc;
+ src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
+ }
+}
+
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
uint32_t pc2dex_data_size = 0u;
uint32_t pc2dex_entries = pc_infos_.Size();
uint32_t pc2dex_offset = 0u;
@@ -423,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap*
uint32_t dex2pc_offset = 0u;
int32_t dex2pc_dalvik_offset = 0;
- if (src_map != nullptr) {
- src_map->reserve(pc2dex_entries);
- }
-
for (size_t i = 0; i < pc2dex_entries; i++) {
struct PcInfo pc_info = pc_infos_.Get(i);
pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
pc2dex_offset = pc_info.native_pc;
pc2dex_dalvik_offset = pc_info.dex_pc;
- if (src_map != nullptr) {
- src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
- }
}
// Walk over the blocks and find which ones correspond to catch block entries.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 07ca6b1ccf..b888aca264 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -205,7 +205,8 @@ class CodeGenerator {
slow_paths_.Add(slow_path);
}
- void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
+ void BuildSourceMap(DefaultSrcMap* src_map) const;
+ void BuildMappingTable(std::vector<uint8_t>* vector) const;
void BuildVMapTable(std::vector<uint8_t>* vector) const;
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
@@ -425,6 +426,8 @@ class CodeGenerator {
StackMapStream stack_map_stream_;
+ friend class OptimizingCFITest;
+
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1f95041a92..a799a519c0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -287,6 +287,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
};
+class DeoptimizationSlowPathARM : public SlowPathCodeARM {
+ public:
+ explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
+};
+
#undef __
#undef __
@@ -493,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+ return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
void CodeGeneratorARM::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
@@ -511,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() {
// PC is in the list of callee-save to mimic Quick, but we need to push
// LR at entry instead.
- __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR);
+ uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
+ __ PushList(push_mask);
+ __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
+ __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize);
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize);
}
- __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ AddConstant(SP, -adjust);
+ __ cfi().AdjustCFAOffset(adjust);
__ StoreToOffset(kStoreWord, R0, SP, 0);
}
@@ -525,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() {
__ bx(LR);
return;
}
- __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ AddConstant(SP, adjust);
+ __ cfi().AdjustCFAOffset(-adjust);
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpops(start_register, POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
}
__ PopList(core_spill_mask_);
}
@@ -542,7 +581,6 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimLong:
case Primitive::kPrimDouble:
return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
- break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
@@ -555,10 +593,11 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimShort:
case Primitive::kPrimVoid:
LOG(FATAL) << "Unexpected type " << load->GetType();
+ UNREACHABLE();
}
LOG(FATAL) << "Unreachable";
- return Location();
+ UNREACHABLE();
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -663,7 +702,6 @@ Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type ty
return Location();
}
UNREACHABLE();
- return Location();
}
void CodeGeneratorARM::Move32(Location destination, Location source) {
@@ -887,24 +925,17 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) {
UNUSED(exit);
}
-void LocationsBuilderARM::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::RequiresRegister());
- }
-}
-
-void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ if (always_true_target != nullptr) {
+ __ b(always_true_target);
}
return;
} else {
@@ -913,10 +944,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
} else {
if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
// Condition has been materialized, compare the output to 0
- DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
- __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(),
+ DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
+ __ cmp(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
ShifterOperand(0));
- __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE);
+ __ b(true_target, NE);
} else {
// Condition has not been materialized, use its inputs as the
// comparison and its condition as the branch condition.
@@ -938,16 +969,55 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
__ cmp(left, ShifterOperand(temp));
}
}
- __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
- ARMCondition(cond->AsCondition()->GetCondition()));
+ __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+ if (false_target != nullptr) {
+ __ b(false_target);
+ }
+}
+
+void LocationsBuilderARM::VisitIf(HIf* if_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
+ Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
+ }
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
}
}
+void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathARM(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
void LocationsBuilderARM::VisitCondition(HCondition* comp) {
LocationSummary* locations =
@@ -1139,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM::VisitReturn(HReturn* ret) {
@@ -1150,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) {
void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index bcdea7a639..06f425ea21 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,6 +188,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
ArmAssembler* const assembler_;
CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 32ada3837e..5fe8adc86a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -352,6 +352,26 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
};
+class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
+};
+
#undef __
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -445,18 +465,65 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
// ... : reserved frame space.
// sp[0] : current method.
__ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
- __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
- __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ GetAssembler()->cfi().AdjustCFAOffset(frame_size);
+ SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
}
}
void CodeGeneratorARM64::GenerateFrameExit() {
if (!HasEmptyFrame()) {
int frame_size = GetFrameSize();
- __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
- __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
__ Drop(frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
+ }
+}
+
+static inline dwarf::Reg DWARFReg(CPURegister reg) {
+ if (reg.IsFPRegister()) {
+ return dwarf::Reg::Arm64Fp(reg.code());
+ } else {
+ DCHECK_LT(reg.code(), 31u); // X0 - X30.
+ return dwarf::Reg::Arm64Core(reg.code());
+ }
+}
+
+void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) {
+ int size = registers.RegisterSizeInBytes();
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size);
+ offset += 2 * size;
}
+ if (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ __ Str(dst0, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+ }
+ DCHECK(registers.IsEmpty());
+}
+
+void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) {
+ int size = registers.RegisterSizeInBytes();
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().Restore(DWARFReg(dst0));
+ GetAssembler()->cfi().Restore(DWARFReg(dst1));
+ offset += 2 * size;
+ }
+ if (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ __ Ldr(dst0, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().Restore(DWARFReg(dst0));
+ }
+ DCHECK(registers.IsEmpty());
}
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -1611,25 +1678,18 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
}
}
-void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::RequiresRegister());
- }
-}
-
-void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
+ vixl::Label* true_target,
+ vixl::Label* false_target,
+ vixl::Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
HCondition* condition = cond->AsCondition();
- vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
if (cond->IsIntConstant()) {
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
- __ B(true_target);
+ if (always_true_target != nullptr) {
+ __ B(always_true_target);
}
return;
} else {
@@ -1637,31 +1697,87 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
}
} else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
// The condition instruction has been materialized, compare the output to 0.
- Location cond_val = if_instr->GetLocations()->InAt(0);
+ Location cond_val = instruction->GetLocations()->InAt(0);
DCHECK(cond_val.IsRegister());
- __ Cbnz(InputRegisterAt(if_instr, 0), true_target);
+ __ Cbnz(InputRegisterAt(instruction, 0), true_target);
} else {
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
Register lhs = InputRegisterAt(condition, 0);
Operand rhs = InputOperandAt(condition, 1);
Condition arm64_cond = ARM64Condition(condition->GetCondition());
- if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
- if (arm64_cond == eq) {
- __ Cbz(lhs, true_target);
- } else {
- __ Cbnz(lhs, true_target);
+ if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+ switch (arm64_cond) {
+ case eq:
+ __ Cbz(lhs, true_target);
+ break;
+ case ne:
+ __ Cbnz(lhs, true_target);
+ break;
+ case lt:
+ // Test the sign bit and branch accordingly.
+ __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ case ge:
+ // Test the sign bit and branch accordingly.
+ __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ default:
+ // Without the `static_cast` the compiler throws an error for
+ // `-Werror=sign-promo`.
+ LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
}
} else {
__ Cmp(lhs, rhs);
__ B(arm64_cond, true_target);
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+ if (false_target != nullptr) {
__ B(false_target);
}
}
+void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
+ vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ vixl::Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
+ }
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathARM64(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ vixl::Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -2349,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
UNUSED(instruction);
+ GetAssembler()->cfi().RememberState();
codegen_->GenerateFrameExit();
__ Ret();
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -2359,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
UNUSED(instruction);
+ GetAssembler()->cfi().RememberState();
codegen_->GenerateFrameExit();
__ Ret();
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM64::VisitShl(HShl* shl) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 2c624d2926..9430e31037 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -23,8 +23,8 @@
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
#include "arch/arm64/quick_method_frame_info_arm64.h"
namespace art {
@@ -165,6 +165,10 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor {
void HandleShift(HBinaryOperation* instr);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ vixl::Label* true_target,
+ vixl::Label* false_target,
+ vixl::Label* always_true_target);
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
@@ -223,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
+ void SpillRegisters(vixl::CPURegList registers, int offset);
+ void UnspillRegisters(vixl::CPURegList registers, int offset);
vixl::CPURegList GetFramePreservedCoreRegisters() const {
return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 007e25ab4a..a6fb07fa98 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -52,7 +52,7 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -67,7 +67,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -116,7 +116,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
length_location_,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -137,7 +137,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
@@ -295,6 +295,27 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
};
+class DeoptimizationSlowPathX86 : public SlowPathCodeX86 {
+ public:
+ explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize)));
+ // No need to restore live registers.
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ codegen->RecordPcInfo(instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
+};
+
#undef __
#define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
@@ -340,7 +361,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32
return GetFloatingPointSpillSlotSize();
}
-CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfXmmRegisters,
@@ -353,7 +376,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features) {
// Use a fake return address register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -436,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
void CodeGeneratorX86::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
__ Bind(&frame_entry_label_);
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -455,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ pushl(reg);
+ __ cfi().AdjustCFAOffset(kX86WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
}
}
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ subl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
__ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
}
@@ -467,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() {
return;
}
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ addl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(-adjust);
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ popl(reg);
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
+ __ cfi().Restore(DWARFReg(reg));
}
}
}
@@ -491,7 +528,6 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimLong:
case Primitive::kPrimDouble:
return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
- break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
@@ -504,10 +540,11 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimShort:
case Primitive::kPrimVoid:
LOG(FATAL) << "Unexpected type " << load->GetType();
+ UNREACHABLE();
}
LOG(FATAL) << "Unreachable";
- return Location();
+ UNREACHABLE();
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -785,24 +822,17 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) {
UNUSED(exit);
}
-void LocationsBuilderX86::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::Any());
- }
-}
-
-void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ if (always_true_target != nullptr) {
+ __ jmp(always_true_target);
}
return;
} else {
@@ -815,20 +845,19 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
// evaluated just before the if, we don't need to evaluate it
// again.
bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr);
+ && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
if (materialized) {
if (!eflags_set) {
// Materialized condition, compare against 0.
- Location lhs = if_instr->GetLocations()->InAt(0);
+ Location lhs = instruction->GetLocations()->InAt(0);
if (lhs.IsRegister()) {
__ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
} else {
__ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
}
- __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(kNotEqual, true_target);
} else {
- __ j(X86Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
Location lhs = cond->GetLocations()->InAt(0);
@@ -847,14 +876,54 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
} else {
__ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
}
- __ j(X86Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+ if (false_target != nullptr) {
+ __ jmp(false_target);
+ }
+}
+
+void LocationsBuilderX86::VisitIf(HIf* if_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
+ Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
}
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeX86* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathX86(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
}
void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -1047,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86::VisitReturn(HReturn* ret) {
@@ -1106,12 +1178,15 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
}
}
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -2637,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
- locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
+ locations->SetInAt(0, Location::Any());
+ // The shift count needs to be in CL or a constant.
locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
- locations->SetInAt(1, Location::RegisterLocation(ECX));
+ // The shift count needs to be in CL or a constant.
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2665,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
- Register first_reg = first.AsRegister<Register>();
- if (second.IsRegister()) {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- __ shll(first_reg, second_reg);
- } else if (op->IsShr()) {
- __ sarl(first_reg, second_reg);
+ if (first.IsRegister()) {
+ Register first_reg = first.AsRegister<Register>();
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ __ shll(first_reg, second_reg);
+ } else if (op->IsShr()) {
+ __ sarl(first_reg, second_reg);
+ } else {
+ __ shrl(first_reg, second_reg);
+ }
} else {
- __ shrl(first_reg, second_reg);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
+ if (op->IsShl()) {
+ __ shll(first_reg, imm);
+ } else if (op->IsShr()) {
+ __ sarl(first_reg, imm);
+ } else {
+ __ shrl(first_reg, imm);
+ }
}
} else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
- if (op->IsShl()) {
- __ shll(first_reg, imm);
- } else if (op->IsShr()) {
- __ sarl(first_reg, imm);
+ DCHECK(first.IsStackSlot()) << first;
+ Address addr(ESP, first.GetStackIndex());
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ __ shll(addr, second_reg);
+ } else if (op->IsShr()) {
+ __ sarl(addr, second_reg);
+ } else {
+ __ shrl(addr, second_reg);
+ }
} else {
- __ shrl(first_reg, imm);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
+ if (op->IsShl()) {
+ __ shll(addr, imm);
+ } else if (op->IsShr()) {
+ __ sarl(addr, imm);
+ } else {
+ __ shrl(addr, imm);
+ }
}
}
+
break;
}
case Primitive::kPrimLong: {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- GenerateShlLong(first, second_reg);
- } else if (op->IsShr()) {
- GenerateShrLong(first, second_reg);
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ GenerateShlLong(first, second_reg);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, second_reg);
+ } else {
+ GenerateUShrLong(first, second_reg);
+ }
} else {
- GenerateUShrLong(first, second_reg);
+ // Shift by a constant.
+ int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+ // Nothing to do if the shift is 0, as the input is already the output.
+ if (shift != 0) {
+ if (op->IsShl()) {
+ GenerateShlLong(first, shift);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, shift);
+ } else {
+ GenerateUShrLong(first, shift);
+ }
+ }
}
break;
}
@@ -2705,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
}
}
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. High gets low, and low gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToLow(), loc.ToHigh(),
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow());
+ } else if (shift > 32) {
+ // Low part becomes 0. High part is low part << (shift-32).
+ __ movl(high, low);
+ __ shll(high, Immediate(shift - 32));
+ __ xorl(low, low);
+ } else {
+ // Between 1 and 31.
+ __ shld(high, low, Immediate(shift));
+ __ shll(low, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
Label done;
__ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2716,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Need to copy the sign.
+ DCHECK_NE(low, high);
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ } else if (shift > 32) {
+ DCHECK_NE(low, high);
+ // High part becomes sign. Low part is shifted by shift - 32.
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ __ shrl(low, Immediate(shift - 32));
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ sarl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2727,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. Low gets high, and high gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToHigh(), loc.ToLow(),
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh());
+ } else if (shift > 32) {
+ // Low part is high >> (shift - 32). High part becomes 0.
+ __ movl(low, high);
+ __ shrl(low, Immediate(shift - 32));
+ __ xorl(high, high);
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ shrl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -3301,7 +3486,7 @@ void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruct
Location obj = locations->InAt(0);
if (obj.IsRegister()) {
- __ cmpl(obj.AsRegister<Register>(), Immediate(0));
+ __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
} else if (obj.IsStackSlot()) {
__ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
} else {
@@ -3487,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
// Ensure the value is in a byte register.
locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
} else {
- locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+ bool is_fp_type = (value_type == Primitive::kPrimFloat)
+ || (value_type == Primitive::kPrimDouble);
+ if (is_fp_type) {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+ }
}
// Temporary registers for the write barrier.
if (needs_write_barrier) {
@@ -3766,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
}
void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack.
+ __ pushl(Address(ESP, src));
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ }
}
void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
- __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
- __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack instead.
+ // Push src low word.
+ __ pushl(Address(ESP, src));
+ // Push src high word. Stack offset = 4.
+ __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
+
+ // Pop into dst high word. Stack offset = 8.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
+ // Finally dst low word. Stack offset = 4.
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ __ movl(temp_reg, Address(ESP, src + kX86WordSize));
+ __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
+ }
}
void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3847,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ xorps(dest, dest);
} else {
ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp = static_cast<Register>(ensure_scratch.GetRegister());
- __ movl(temp, Immediate(value));
- __ movd(dest, temp);
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Avoid spilling/restoring a scratch register by using the stack.
+ __ pushl(Immediate(value));
+ __ movss(dest, Address(ESP, 0));
+ __ addl(ESP, Immediate(4));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Immediate(value));
+ __ movd(dest, temp);
+ }
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
@@ -3899,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
}
}
-void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
- Register suggested_scratch = reg == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch(
- this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ ScratchRegisterScope possible_scratch(
+ this, reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister || temp_reg == reg2) {
+ __ pushl(reg1);
+ __ movl(reg1, reg2);
+ __ popl(reg2);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, reg1);
+ __ movl(reg1, reg2);
+ __ movl(reg2, temp);
+ }
+}
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
- __ movl(Address(ESP, mem + stack_offset), reg);
- __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+ ScratchRegisterScope possible_scratch(
+ this, reg, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movl(Address(ESP, mem + kX86WordSize), reg);
+ __ popl(reg);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movl(Address(ESP, mem), reg);
+ __ movl(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, mem + stack_offset));
- __ movss(Address(ESP, mem + stack_offset), reg);
- __ movd(reg, temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movss(Address(ESP, mem + kX86WordSize), reg);
+ __ movss(reg, Address(ESP, 0));
+ __ addl(ESP, Immediate(kX86WordSize));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movss(Address(ESP, mem), reg);
+ __ movd(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
- ScratchRegisterScope ensure_scratch1(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch2(
- this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
-
- int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
- stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
- __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
- __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
- __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+ ScratchRegisterScope possible_scratch1(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg1 = possible_scratch1.GetRegister();
+ if (temp_reg1 == kNoRegister) {
+ // No free registers. Use the stack.
+ __ pushl(Address(ESP, mem1));
+ __ pushl(Address(ESP, mem2 + kX86WordSize));
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem1 + kX86WordSize));
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Got the first one. Try for a second.
+ ScratchRegisterScope possible_scratch2(
+ this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg2 = possible_scratch2.GetRegister();
+ if (temp_reg2 == kNoRegister) {
+ Register temp = static_cast<Register>(temp_reg1);
+ // Bummer. Only have one free register to use.
+ // Save mem1 on the stack.
+ __ pushl(Address(ESP, mem1));
+
+ // Copy mem2 into mem1.
+ __ movl(temp, Address(ESP, mem2 + kX86WordSize));
+ __ movl(Address(ESP, mem1 + kX86WordSize), temp);
+
+ // Now pop mem1 into mem2.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Great. We have 2 registers to play with.
+ Register temp1 = static_cast<Register>(temp_reg1);
+ Register temp2 = static_cast<Register>(temp_reg2);
+ DCHECK_NE(temp1, temp2);
+ __ movl(temp1, Address(ESP, mem1));
+ __ movl(temp2, Address(ESP, mem2));
+ __ movl(Address(ESP, mem2), temp1);
+ __ movl(Address(ESP, mem1), temp2);
+ }
+ }
}
void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -3943,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+ Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index a5489d2844..8c56e35329 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver {
X86Assembler* GetAssembler() const;
private:
+ void Exchange(Register reg1, Register Reg2);
void Exchange(Register reg, int mem);
void Exchange(int mem1, int mem2);
void Exchange32(XmmRegister reg, int mem);
@@ -171,6 +172,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateShlLong(const Location& loc, Register shifter);
void GenerateShrLong(const Location& loc, Register shifter);
void GenerateUShrLong(const Location& loc, Register shifter);
+ void GenerateShlLong(const Location& loc, int shift);
+ void GenerateShrLong(const Location& loc, int shift);
+ void GenerateUShrLong(const Location& loc, int shift);
void GenerateMemoryBarrier(MemBarrierKind kind);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -179,6 +183,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -188,7 +196,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
class CodeGeneratorX86 : public CodeGenerator {
public:
- CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86() {}
void GenerateFrameEntry() OVERRIDE;
@@ -274,6 +284,10 @@ class CodeGeneratorX86 : public CodeGenerator {
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+ const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -282,6 +296,7 @@ class CodeGeneratorX86 : public CodeGenerator {
InstructionCodeGeneratorX86 instruction_visitor_;
ParallelMoveResolverX86 move_resolver_;
X86Assembler assembler_;
+ const X86InstructionSetFeatures& isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
};
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2bb0349932..01b24ea33f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -315,6 +315,27 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
};
+class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+ explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ __ gs()->call(
+ Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true));
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ codegen->RecordPcInfo(instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
+};
+
#undef __
#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
@@ -391,7 +412,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
-CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
@@ -405,7 +428,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features),
+ constant_area_start_(0) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -458,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+ return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
void CodeGeneratorX86_64::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
__ Bind(&frame_entry_label_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -478,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ pushq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(kX86_64WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
}
}
- __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ subq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
uint32_t xmm_spill_location = GetFpuSpillStart();
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
- XmmRegister(kFpuCalleeSaves[i]));
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+ __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
}
}
@@ -503,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() {
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- __ movsd(XmmRegister(kFpuCalleeSaves[i]),
- Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
+ __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
}
}
- __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ addq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(-adjust);
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ popq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+ __ cfi().Restore(DWARFReg(reg));
}
}
}
@@ -532,7 +575,6 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimLong:
case Primitive::kPrimDouble:
return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
- break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
@@ -545,10 +587,11 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimShort:
case Primitive::kPrimVoid:
LOG(FATAL) << "Unexpected type " << load->GetType();
+ UNREACHABLE();
}
LOG(FATAL) << "Unreachable";
- return Location();
+ UNREACHABLE();
}
void CodeGeneratorX86_64::Move(Location destination, Location source) {
@@ -607,7 +650,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int64_t value = constant->AsLongConstant()->GetValue();
+ int64_t value;
if (constant->IsDoubleConstant()) {
value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
} else {
@@ -735,24 +778,17 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
UNUSED(exit);
}
-void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::Any());
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ if (always_true_target != nullptr) {
+ __ jmp(always_true_target);
}
return;
} else {
@@ -765,21 +801,20 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
// evaluated just before the if, we don't need to evaluate it
// again.
bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr);
+ && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
if (materialized) {
if (!eflags_set) {
// Materialized condition, compare against 0.
- Location lhs = if_instr->GetLocations()->InAt(0);
+ Location lhs = instruction->GetLocations()->InAt(0);
if (lhs.IsRegister()) {
__ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
} else {
__ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
Immediate(0));
}
- __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(kNotEqual, true_target);
} else {
- __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
Location lhs = cond->GetLocations()->InAt(0);
@@ -797,16 +832,56 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
__ cmpl(lhs.AsRegister<CpuRegister>(),
Address(CpuRegister(RSP), rhs.GetStackIndex()));
}
- __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+ if (false_target != nullptr) {
+ __ jmp(false_target);
+ }
+}
+
+void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
}
}
+void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
+ Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
+ }
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathX86_64(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
local->SetLocations(nullptr);
}
@@ -1068,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
@@ -1120,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
}
}
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -1181,7 +1262,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -1242,7 +1323,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -1896,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -1960,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
}
case Primitive::kPrimFloat: {
- __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -1993,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2031,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
}
case Primitive::kPrimFloat: {
- __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2069,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2114,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat: {
DCHECK(first.Equals(locations->Out()));
- __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
DCHECK(first.Equals(locations->Out()));
- __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2493,7 +2628,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2518,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
}
case Primitive::kPrimFloat: {
- __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -3668,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
ScratchRegisterScope ensure_scratch(
- this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+ this, TMP, codegen_->GetNumberOfCoreRegisters());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
- __ movq(CpuRegister(ensure_scratch.GetRegister()),
- Address(CpuRegister(RSP), mem2 + stack_offset));
- __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
- __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
- CpuRegister(ensure_scratch.GetRegister()));
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Use the stack as a temporary.
+ // Save mem1 on the stack.
+ __ pushq(Address(CpuRegister(RSP), mem1));
+
+ // Copy mem2 into mem1.
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
+ __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+
+ // Now pop mem1 into mem2.
+ __ popq(Address(CpuRegister(RSP), mem2));
+ } else {
+ CpuRegister temp = CpuRegister(temp_reg);
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
+ __ movq(temp, Address(CpuRegister(RSP), mem2));
+ __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
+ __ movq(Address(CpuRegister(RSP), mem1), temp);
+ }
}
void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3685,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
__ movd(reg, CpuRegister(TMP));
}
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ __ movq(CpuRegister(TMP), reg1);
+ __ movq(reg1, reg2);
+ __ movq(reg2, CpuRegister(TMP));
+}
+
void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
__ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3697,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+ Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -4062,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+ // Generate the constant area if needed.
+ X86_64Assembler* assembler = GetAssembler();
+ if (!assembler->IsConstantAreaEmpty()) {
+ // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+ // byte values. If used for vectors at a later time, this will need to be
+ // updated to 16 bytes with the appropriate offset.
+ assembler->Align(4, 0);
+ constant_area_start_ = assembler->CodeSize();
+ assembler->AddConstantArea();
+ }
+
+ // And finish up.
+ CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+ public:
+ RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
+ : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+ private:
+ void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ // Patch the correct offset for the instruction. We use the address of the
+ // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+ int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+ int relative_position = constant_offset - pos;
+
+ // Patch in the right value.
+ region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+ }
+
+ const CodeGeneratorX86_64& codegen_;
+
+ // Location in constant area that the fixup refers to.
+ int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+ return Address::RIP(fixup);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f6fbc2e6bc..61bf6ac71d 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
void Exchange32(CpuRegister reg, int mem);
void Exchange32(XmmRegister reg, int mem);
void Exchange32(int mem1, int mem2);
+ void Exchange64(CpuRegister reg1, CpuRegister reg2);
void Exchange64(CpuRegister reg, int mem);
void Exchange64(XmmRegister reg, int mem);
void Exchange64(int mem1, int mem2);
@@ -185,6 +186,10 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
void GenerateExplicitNullCheck(HNullCheck* instruction);
void PushOntoFPStack(Location source, uint32_t temp_offset,
uint32_t stack_adjustment, bool is_float);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
X86_64Assembler* const assembler_;
CodeGeneratorX86_64* const codegen_;
@@ -194,7 +199,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
class CodeGeneratorX86_64 : public CodeGenerator {
public:
- CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorX86_64(HGraph* graph,
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86_64() {}
void GenerateFrameEntry() OVERRIDE;
@@ -240,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void Finalize(CodeAllocator* allocator) OVERRIDE;
InstructionSet GetInstructionSet() const OVERRIDE {
return InstructionSet::kX86_64;
@@ -267,6 +275,19 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+ const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
+ int ConstantAreaStart() const {
+ return constant_area_start_;
+ }
+
+ Address LiteralDoubleAddress(double v);
+ Address LiteralFloatAddress(float v);
+ Address LiteralInt32Address(int32_t v);
+ Address LiteralInt64Address(int64_t v);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -275,6 +296,11 @@ class CodeGeneratorX86_64 : public CodeGenerator {
InstructionCodeGeneratorX86_64 instruction_visitor_;
ParallelMoveResolverX86_64 move_resolver_;
X86_64Assembler assembler_;
+ const X86_64InstructionSetFeatures& isa_features_;
+
+ // Offset to the start of the constant area in the assembled code.
+ // Used for fixups to the constant area.
+ int constant_area_start_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6053ad51f4..2be117bf38 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -19,6 +19,8 @@
#include "arch/instruction_set.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
#include "code_generator_arm.h"
@@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
InternalCodeAllocator allocator;
CompilerOptions compiler_options;
- x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
// We avoid doing a stack overflow check that requires the runtime being setup,
// by making sure the compiler knows the methods we are running are leaf methods.
codegenX86.CompileBaseline(&allocator, true);
@@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
Run(allocator, codegenARM, has_result, expected);
}
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
codegenX86_64.CompileBaseline(&allocator, true);
if (kRuntimeISA == kX86_64) {
Run(allocator, codegenX86_64, has_result, expected);
@@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph,
compiler_options);
RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86) {
- x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86_64) {
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
}
}
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index fd8c0c6242..966165bf4c 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -20,8 +20,8 @@
#include "locations.h"
#include "nodes.h"
#include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
namespace art {
namespace arm64 {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 6853d54c48..02ad675dc3 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -16,6 +16,7 @@
#include <functional>
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(expected_before, actual_before);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
HConstantFolding(graph).Run();
SSAChecker ssa_checker_cf(&allocator, graph);
ssa_checker_cf.Run();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index a644719622..98ae1ec5d3 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "dead_code_elimination.h"
#include "driver/compiler_options.h"
@@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(actual_before, expected_before);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
HDeadCodeElimination(graph).Run();
SSAChecker ssa_checker(&allocator, graph);
ssa_checker.Run();
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 49c0d3884f..4c283788b5 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -337,13 +337,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
HGraphVisualizer::HGraphVisualizer(std::ostream* output,
HGraph* graph,
- const CodeGenerator& codegen,
- const char* method_name)
- : output_(output), graph_(graph), codegen_(codegen) {
- if (output == nullptr) {
- return;
- }
+ const CodeGenerator& codegen)
+ : output_(output), graph_(graph), codegen_(codegen) {}
+void HGraphVisualizer::PrintHeader(const char* method_name) const {
+ DCHECK(output_ != nullptr);
HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
printer.StartTag("compilation");
printer.PrintProperty("name", method_name);
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index bc553aed74..513bceb369 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -35,9 +35,9 @@ class HGraphVisualizer : public ValueObject {
public:
HGraphVisualizer(std::ostream* output,
HGraph* graph,
- const CodeGenerator& codegen,
- const char* method_name);
+ const CodeGenerator& codegen);
+ void PrintHeader(const char* method_name) const;
void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
private:
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 628a844cc7..20aa45f197 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -90,7 +90,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
case kIntrinsicReverseBytes:
switch (GetType(method.d.data, true)) {
case Primitive::kPrimShort:
@@ -103,7 +102,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
// Abs.
case kIntrinsicAbsDouble:
@@ -166,7 +164,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
// Memory.poke.
case kIntrinsicPoke:
@@ -183,7 +180,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
// String.
case kIntrinsicCharAt:
@@ -211,7 +207,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
case kIntrinsicUnsafeGet: {
const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
switch (GetType(method.d.data, false)) {
@@ -225,7 +220,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
}
case kIntrinsicUnsafePut: {
enum Sync { kNoSync, kVolatile, kOrdered };
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 33176f009c..94e27e912e 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -776,10 +776,10 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ mov(out, ShifterOperand(0), CC);
}
-void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) {
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
-void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) {
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 72d303c870..d1176c460f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,8 +28,8 @@
#include "utils/arm64/assembler_arm64.h"
#include "utils/arm64/constants_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
using namespace vixl; // NOLINT(build/namespaces)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 384737f55a..aec2d19b1d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,7 @@
#include "intrinsics_x86.h"
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
@@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000;
static constexpr int kDoubleNaNLow = 0x00000000;
static constexpr int kFloatNaN = 0x7FC00000;
+IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
+ : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
}
@@ -152,6 +158,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
+ RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
} else {
UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
UNREACHABLE();
@@ -313,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
}
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location input = locations->InAt(0);
+ Register input_lo = input.AsRegisterPairLow<Register>();
+ Register input_hi = input.AsRegisterPairHigh<Register>();
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ X86Assembler* assembler = GetAssembler();
+ // Assign the inputs to the outputs, mixing low/high.
+ __ movl(output_lo, input_hi);
+ __ movl(output_hi, input_lo);
+ __ bswapl(output_lo);
+ __ bswapl(output_hi);
+}
+
void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -719,6 +747,149 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
GetAssembler()->sqrtsd(out, in);
}
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
+ MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+ // Copy the result back to the expected output.
+ Location out = invoke->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister());
+ MoveFromReturnRegister(out, invoke->GetType(), codegen);
+ }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ CreateFPToFPLocations(arena, invoke);
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
+ HInvoke* invoke,
+ X86Assembler* assembler,
+ int round_mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen, invoke);
+ } else {
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
+// as it needs 64 bit instructions.
+void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
+ // Do we have instruction support?
+ if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(EAX));
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ Register out = locations->Out().AsRegister<Register>();
+ XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ movd(inPlusPointFive, out);
+
+ // Add in the input.
+ __ addss(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movl(out, Immediate(kPrimIntMax));
+ // maxInt = int-to-float(out)
+ __ cvtsi2ss(maxInt, out);
+
+ // if inPlusPointFive >= maxInt goto done
+ __ comiss(inPlusPointFive, maxInt);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(out, inPlusPointFive);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorl(out, out);
+ __ Bind(&done);
+}
+
void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1180,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
}
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Offset is a long, but in 32 bit mode, we only need the low word.
+ // Can we update the invoke here to remove a TypeConvert to Long?
+ locations->SetInAt(2, Location::RequiresRegister());
+ // Expected value must be in EAX or EDX:EAX.
+ // For long, new value must be in ECX:EBX.
+ if (type == Primitive::kPrimLong) {
+ locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+ locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+ } else {
+ locations->SetInAt(3, Location::RegisterLocation(EAX));
+ locations->SetInAt(4, Location::RequiresRegister());
+ }
+
+ // Force a byte register for the output.
+ locations->SetOut(Location::RegisterLocation(EAX));
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ // Need a byte register for marking.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register base = locations->InAt(1).AsRegister<Register>();
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location out = locations->Out();
+ DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+ if (type == Primitive::kPrimLong) {
+ DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+ DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+ DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+ DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+ __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+ } else {
+ // Integer or object.
+ DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+ Register value = locations->InAt(4).AsRegister<Register>();
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored.
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+ locations->GetTemp(1).AsRegister<Register>(),
+ base,
+ value);
+ }
+
+ __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ }
+
+ // locked cmpxchg has full barrier semantics, and we don't need scheduling
+ // barriers at this time.
+
+ // Convert ZF into the boolean result.
+ __ setb(kZero, out.AsRegister<Register>());
+ __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+ X86Assembler* assembler) {
+ Immediate imm_shift(shift);
+ Immediate imm_mask(mask);
+ __ movl(temp, reg);
+ __ shrl(reg, imm_shift);
+ __ andl(temp, imm_mask);
+ __ andl(reg, imm_mask);
+ __ shll(temp, imm_shift);
+ __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register reg = locations->InAt(0).AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+ * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+ * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+ */
+ __ bswapl(reg);
+ SwapBits(reg, temp, 1, 0x55555555, assembler);
+ SwapBits(reg, temp, 2, 0x33333333, assembler);
+ SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ // We want to swap high/low, then bswap each one, and then do the same
+ // as a 32 bit reverse.
+ // Exchange high and low.
+ __ movl(temp, reg_low);
+ __ movl(reg_low, reg_high);
+ __ movl(reg_high, temp);
+
+ // bit-reverse low
+ __ bswapl(reg_low);
+ SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+ SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+ SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+ // bit-reverse high
+ __ bswapl(reg_high);
+ SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+ SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+ SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1188,20 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
} // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index e1e8260a5f..4292ec7b99 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -32,7 +32,7 @@ class X86Assembler;
class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen);
// Define visitor methods.
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorX86* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86);
};
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 736cea88cb..cbf94f0f81 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,7 @@
#include "intrinsics_x86_64.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator_x86_64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
@@ -30,6 +31,11 @@ namespace art {
namespace x86_64 {
+IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
+ : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
}
@@ -292,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke)
// TODO: Allow x86 to work with memory. This requires assembler support, see below.
// locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
- locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+ bool is64bit,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen) {
Location output = locations->Out();
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (output.IsFpuRegister()) {
// In-register
- XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Can mask directly with constant area using pand if we can guarantee
+ // that the literal is aligned on a 16 byte boundary. This will avoid a
+ // temporary.
if (is64bit) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
__ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
__ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
}
} else {
@@ -335,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -343,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
}
static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -393,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
- X86_64Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool is_double,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen) {
Location op1_loc = locations->InAt(0);
Location op2_loc = locations->InAt(1);
Location out_loc = locations->Out();
@@ -421,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
//
// This removes one jmp, but needs to copy one input (op1) to out.
//
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
@@ -455,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
// NaN handling.
__ Bind(&nan);
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
if (is_double) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
}
- __ movd(out, cpu_temp, is_double);
__ jmp(&done);
// out := op2;
@@ -477,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
__ Bind(&done);
}
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -486,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo
// The following is sub-optimal, but all we can do for now. It would be fine to also accept
// the second input to be the output (we can simply swap inputs).
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
}
static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -614,6 +621,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
GetAssembler()->sqrtsd(out, in);
}
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+ MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+ // Copy the result back to the expected output.
+ Location out = invoke->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister());
+ MoveFromReturnRegister(out, invoke->GetType(), codegen);
+ }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ CreateFPToFPLocations(arena, invoke);
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ // Needs to be RDI for the invoke.
+ locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
+ HInvoke* invoke,
+ X86_64Assembler* assembler,
+ int round_mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen, invoke);
+ } else {
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(RAX));
+ // Needs to be RDI for the invoke.
+ locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86_64Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ movd(inPlusPointFive, out, false);
+
+ // Add in the input.
+ __ addss(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movl(out, Immediate(kPrimIntMax));
+ // maxInt = int-to-float(out)
+ __ cvtsi2ss(maxInt, out);
+
+ // if inPlusPointFive >= maxInt goto done
+ __ comiss(inPlusPointFive, maxInt);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(out, inPlusPointFive);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorl(out, out);
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86_64Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
+ __ movd(inPlusPointFive, out, true);
+
+ // Add in the input.
+ __ addsd(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movq(out, Immediate(kPrimLongMax));
+ // maxLong = long-to-double(out)
+ __ cvtsi2sd(maxLong, out, true);
+
+ // if inPlusPointFive >= maxLong goto done
+ __ comisd(inPlusPointFive, maxLong);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = double-to-long-truncate(input)
+ __ cvttsd2si(out, inPlusPointFive, true);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorq(out, out);
+ __ Bind(&done);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -999,6 +1203,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
}
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ // expected value must be in EAX/RAX.
+ locations->SetInAt(3, Location::RegisterLocation(RAX));
+ locations->SetInAt(4, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister());
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+ DCHECK_EQ(expected.AsRegister(), RAX);
+ CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+ } else {
+ // Integer or object.
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored.
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+ locations->GetTemp(1).AsRegister<CpuRegister>(),
+ base,
+ value);
+ }
+
+ __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ }
+
+ // locked cmpxchg has full barrier semantics, and we don't need scheduling
+ // barriers at this time.
+
+ // Convert ZF into the boolean result.
+ __ setcc(kZero, out);
+ __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+ X86_64Assembler* assembler) {
+ Immediate imm_shift(shift);
+ Immediate imm_mask(mask);
+ __ movl(temp, reg);
+ __ shrl(reg, imm_shift);
+ __ andl(temp, imm_mask);
+ __ andl(reg, imm_mask);
+ __ shll(temp, imm_shift);
+ __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+ * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+ * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+ */
+ __ bswapl(reg);
+ SwapBits(reg, temp, 1, 0x55555555, assembler);
+ SwapBits(reg, temp, 2, 0x33333333, assembler);
+ SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+ int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+ Immediate imm_shift(shift);
+ __ movq(temp_mask, Immediate(mask));
+ __ movq(temp, reg);
+ __ shrq(reg, imm_shift);
+ __ andq(temp, temp_mask);
+ __ andq(reg, temp_mask);
+ __ shlq(temp, imm_shift);
+ __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+ * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+ * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+ */
+ __ bswapq(reg);
+ SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+ SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+ SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1007,19 +1380,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
} // namespace x86_64
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index dfae7fa90e..0e0e72c1fc 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -32,7 +32,7 @@ class X86_64Assembler;
class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen);
// Define visitor methods.
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorX86_64* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
};
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index f22b7a7e82..28c5555d57 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
#include <fstream>
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index c102c4f02f..61d6593f2b 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
RemoveSuspendChecks(graph);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b0cfde0cf..81250ca133 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) {
graph->TryBuildingSsa();
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index dca612e6b7..d8a8554610 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -752,8 +752,8 @@ HInstruction* HBinaryOperation::GetLeastConstantLeft() const {
}
}
-bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const {
- return this == if_->GetPreviousDisregardingMoves();
+bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
+ return this == instruction->GetPreviousDisregardingMoves();
}
bool HInstruction::Equals(HInstruction* other) const {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 21ed3504f1..f764eb421f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -682,6 +682,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(ClinitCheck, Instruction) \
M(Compare, BinaryOperation) \
M(Condition, BinaryOperation) \
+ M(Deoptimize, Instruction) \
M(Div, BinaryOperation) \
M(DivZeroCheck, Instruction) \
M(DoubleConstant, Constant) \
@@ -1191,7 +1192,17 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
bool HasEnvironment() const { return environment_ != nullptr; }
HEnvironment* GetEnvironment() const { return environment_; }
- void SetEnvironment(HEnvironment* environment) { environment_ = environment; }
+ // Set the `environment_` field. Raw because this method does not
+ // update the uses lists.
+ void SetRawEnvironment(HEnvironment* environment) { environment_ = environment; }
+
+ // Set the environment of this instruction, copying it from `environment`. While
+ // copying, the uses lists are being updated.
+ void CopyEnvironmentFrom(HEnvironment* environment) {
+ ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+ environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+ environment_->CopyFrom(environment);
+ }
// Returns the number of entries in the environment. Typically, that is the
// number of dex registers in a method. It could be more in case of inlining.
@@ -1544,12 +1555,31 @@ class HIf : public HTemplateInstruction<1> {
DECLARE_INSTRUCTION(If);
- virtual bool IsIfInstruction() const { return true; }
-
private:
DISALLOW_COPY_AND_ASSIGN(HIf);
};
+// Deoptimize to interpreter, upon checking a condition.
+class HDeoptimize : public HTemplateInstruction<1> {
+ public:
+ HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+ : HTemplateInstruction(SideEffects::None()),
+ dex_pc_(dex_pc) {
+ SetRawInputAt(0, cond);
+ }
+
+ bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool CanThrow() const OVERRIDE { return true; }
+ uint32_t GetDexPc() const { return dex_pc_; }
+
+ DECLARE_INSTRUCTION(Deoptimize);
+
+ private:
+ uint32_t dex_pc_;
+
+ DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
+};
+
class HUnaryOperation : public HExpression<1> {
public:
HUnaryOperation(Primitive::Type result_type, HInstruction* input)
@@ -1667,8 +1697,8 @@ class HCondition : public HBinaryOperation {
void ClearNeedsMaterialization() { needs_materialization_ = false; }
// For code generation purposes, returns whether this instruction is just before
- // `if_`, and disregard moves in between.
- bool IsBeforeWhenDisregardMoves(HIf* if_) const;
+ // `instruction`, and disregard moves in between.
+ bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
DECLARE_INSTRUCTION(Condition);
@@ -2307,6 +2337,9 @@ class HNewArray : public HExpression<1> {
// Calls runtime so needs an environment.
bool NeedsEnvironment() const OVERRIDE { return true; }
+ // May throw NegativeArraySizeException, OutOfMemoryError, etc.
+ bool CanThrow() const OVERRIDE { return true; }
+
bool CanBeNull() const OVERRIDE { return false; }
QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4cf22d3b2e..4e83ce576c 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -50,7 +50,7 @@ TEST(Node, RemoveInstruction) {
exit_block->AddInstruction(new (&allocator) HExit());
HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
- null_check->SetEnvironment(environment);
+ null_check->SetRawEnvironment(environment);
environment->SetRawEnvAt(0, parameter);
parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
new file mode 100644
index 0000000000..6d986ba7d3
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "optimizing/code_generator.h"
+#include "utils/assembler.h"
+
+#include "optimizing/optimizing_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class OptimizingCFITest : public CFITest {
+ public:
+ // Enable this flag to generate the expected outputs.
+ static constexpr bool kGenerateExpected = false;
+
+ void TestImpl(InstructionSet isa, const char* isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
+ // Setup simple context.
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ CompilerOptions opts;
+ std::unique_ptr<const InstructionSetFeatures> isa_features;
+ std::string error;
+ isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ HGraph graph(&allocator);
+ // Generate simple frame with some spills.
+ std::unique_ptr<CodeGenerator> code_gen(
+ CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+ const int frame_size = 64;
+ int core_reg = 0;
+ int fp_reg = 0;
+ for (int i = 0; i < 2; i++) { // Two registers of each kind.
+ for (; core_reg < 32; core_reg++) {
+ if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+ auto location = Location::RegisterLocation(core_reg);
+ code_gen->AddAllocatedRegister(location);
+ core_reg++;
+ break;
+ }
+ }
+ for (; fp_reg < 32; fp_reg++) {
+ if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+ auto location = Location::FpuRegisterLocation(fp_reg);
+ code_gen->AddAllocatedRegister(location);
+ fp_reg++;
+ break;
+ }
+ }
+ }
+ code_gen->ComputeSpillMask();
+ code_gen->SetFrameSize(frame_size);
+ code_gen->GenerateFrameEntry();
+ code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid());
+ // Get the outputs.
+ InternalCodeAllocator code_allocator;
+ code_gen->Finalize(&code_allocator);
+ const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
+ Assembler* opt_asm = code_gen->GetAssembler();
+ const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+
+ if (kGenerateExpected) {
+ GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+ } else {
+ EXPECT_EQ(expected_asm, actual_asm);
+ EXPECT_EQ(expected_cfi, actual_cfi);
+ }
+ }
+
+ private:
+ class InternalCodeAllocator : public CodeAllocator {
+ public:
+ InternalCodeAllocator() {}
+
+ virtual uint8_t* Allocate(size_t size) {
+ memory_.resize(size);
+ return memory_.data();
+ }
+
+ const std::vector<uint8_t>& GetMemory() { return memory_; }
+
+ private:
+ std::vector<uint8_t> memory_;
+
+ DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+ };
+};
+
+#define TEST_ISA(isa) \
+ TEST_F(OptimizingCFITest, isa) { \
+ std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(isa, #isa, expected_asm, expected_cfi); \
+ }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+
+#endif // HAVE_ANDROID_OS
+
+} // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
new file mode 100644
index 0000000000..2125f6eb01
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -0,0 +1,141 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+ 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+ 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+ 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
+ 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: .cfi_restore_state
+// 0x00000012: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+ 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+ 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+ 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+ 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+ 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
+ 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: str w0, [sp, #-64]!
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp x19, x20, [sp, #40]
+// 0x00000008: .cfi_offset: r19 at cfa-24
+// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000008: str lr, [sp, #56]
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: stp d8, d9, [sp, #24]
+// 0x00000010: .cfi_offset_extended: r72 at cfa-40
+// 0x00000010: .cfi_offset_extended: r73 at cfa-32
+// 0x00000010: .cfi_remember_state
+// 0x00000010: ldp d8, d9, [sp, #24]
+// 0x00000014: .cfi_restore_extended: r72
+// 0x00000014: .cfi_restore_extended: r73
+// 0x00000014: ldp x19, x20, [sp, #40]
+// 0x00000018: .cfi_restore: r19
+// 0x00000018: .cfi_restore: r20
+// 0x00000018: ldr lr, [sp, #56]
+// 0x0000001c: .cfi_restore: r30
+// 0x0000001c: add sp, sp, #0x40 (64)
+// 0x00000020: .cfi_def_cfa_offset: 0
+// 0x00000020: ret
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+ 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+ 0x5E, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+ 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
+ 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+ 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push esi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r6 at cfa-8
+// 0x00000001: push ebp
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: sub esp, 52
+// 0x00000005: .cfi_def_cfa_offset: 64
+// 0x00000005: mov [esp], eax
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add esp, 52
+// 0x0000000b: .cfi_def_cfa_offset: 12
+// 0x0000000b: pop ebp
+// 0x0000000c: .cfi_def_cfa_offset: 8
+// 0x0000000c: .cfi_restore: r5
+// 0x0000000c: pop esi
+// 0x0000000d: .cfi_def_cfa_offset: 4
+// 0x0000000d: .cfi_restore: r6
+// 0x0000000d: ret
+// 0x0000000e: .cfi_restore_state
+// 0x0000000e: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+ 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
+ 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2,
+ 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+ 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+ 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
+ 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47,
+ 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
+ 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push rbp
+// 0x00000001: .cfi_def_cfa_offset: 16
+// 0x00000001: .cfi_offset: r6 at cfa-16
+// 0x00000001: push rbx
+// 0x00000002: .cfi_def_cfa_offset: 24
+// 0x00000002: .cfi_offset: r3 at cfa-24
+// 0x00000002: subq rsp, 40
+// 0x00000006: .cfi_def_cfa_offset: 64
+// 0x00000006: movsd [rsp + 32], xmm13
+// 0x0000000d: .cfi_offset: r30 at cfa-32
+// 0x0000000d: movsd [rsp + 24], xmm12
+// 0x00000014: .cfi_offset: r29 at cfa-40
+// 0x00000014: mov [rsp], edi
+// 0x00000017: .cfi_remember_state
+// 0x00000017: movsd xmm12, [rsp + 24]
+// 0x0000001e: .cfi_restore: r29
+// 0x0000001e: movsd xmm13, [rsp + 32]
+// 0x00000025: .cfi_restore: r30
+// 0x00000025: addq rsp, 40
+// 0x00000029: .cfi_def_cfa_offset: 24
+// 0x00000029: pop rbx
+// 0x0000002a: .cfi_def_cfa_offset: 16
+// 0x0000002a: .cfi_restore: r3
+// 0x0000002a: pop rbp
+// 0x0000002b: .cfi_def_cfa_offset: 8
+// 0x0000002b: .cfi_restore: r6
+// 0x0000002b: ret
+// 0x0000002c: .cfi_restore_state
+// 0x0000002c: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b2f9c65153..0e02212867 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,11 +26,13 @@
#include "bounds_check_elimination.h"
#include "builder.h"
#include "code_generator.h"
+#include "compiled_method.h"
#include "compiler.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "elf_writer_quick.h"
#include "graph_visualizer.h"
@@ -48,6 +50,7 @@
#include "ssa_builder.h"
#include "ssa_phi_elimination.h"
#include "ssa_liveness_analysis.h"
+#include "utils/assembler.h"
#include "reference_type_propagation.h"
namespace art {
@@ -94,10 +97,13 @@ class PassInfoPrinter : public ValueObject {
timing_logger_enabled_(compiler_driver->GetDumpPasses()),
timing_logger_(method_name, true, true),
visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
- visualizer_(visualizer_output, graph, codegen, method_name_) {
+ visualizer_(visualizer_output, graph, codegen) {
if (strstr(method_name, kStringFilter) == nullptr) {
timing_logger_enabled_ = visualizer_enabled_ = false;
}
+ if (visualizer_enabled_) {
+ visualizer_.PrintHeader(method_name_);
+ }
}
~PassInfoPrinter() {
@@ -199,8 +205,13 @@ class OptimizingCompiler FINAL : public Compiler {
const std::vector<const art::DexFile*>& dex_files,
const std::string& android_root,
bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
+ if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+ return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ } else {
+ return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ }
}
void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
@@ -360,6 +371,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
return ArrayRef<const uint8_t>(vector);
}
+// TODO: The function below uses too much stack space.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
@@ -385,12 +399,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeVectorAllocator allocator;
codegen->CompileOptimized(&allocator);
+ DefaultSrcMap src_mapping_table;
+ if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ codegen->BuildSourceMap(&src_mapping_table);
+ }
+
std::vector<uint8_t> stack_map;
codegen->BuildStackMaps(&stack_map);
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
- return CompiledMethod::SwapAllocCompiledMethodStackMap(
+ return CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -400,9 +419,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
- ArrayRef<const uint8_t>(stack_map));
+ &src_mapping_table,
+ ArrayRef<const uint8_t>(), // mapping_table.
+ ArrayRef<const uint8_t>(stack_map),
+ ArrayRef<const uint8_t>(), // native_gc_map.
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
+#pragma GCC diagnostic pop
CompiledMethod* OptimizingCompiler::CompileBaseline(
CodeGenerator* codegen,
@@ -412,9 +437,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
codegen->CompileBaseline(&allocator);
std::vector<uint8_t> mapping_table;
+ codegen->BuildMappingTable(&mapping_table);
DefaultSrcMap src_mapping_table;
- bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
- codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
+ if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ codegen->BuildSourceMap(&src_mapping_table);
+ }
std::vector<uint8_t> vmap_table;
codegen->BuildVMapTable(&vmap_table);
std::vector<uint8_t> gc_map;
@@ -435,7 +462,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
AlignVectorSize(mapping_table),
AlignVectorSize(vmap_table),
AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>());
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
@@ -501,6 +529,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
+ codegen->GetAssembler()->cfi().SetEnabled(
+ compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols());
PassInfoPrinter pass_info_printer(graph,
method_name.c_str(),
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 7d0641ec13..4936685367 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <iostream>
#include "parallel_move_resolver.h"
#include "nodes.h"
@@ -63,39 +64,42 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
}
}
+Location LowOf(Location location) {
+ if (location.IsRegisterPair()) {
+ return Location::RegisterLocation(location.low());
+ } else if (location.IsFpuRegisterPair()) {
+ return Location::FpuRegisterLocation(location.low());
+ } else if (location.IsDoubleStackSlot()) {
+ return Location::StackSlot(location.GetStackIndex());
+ } else {
+ return Location::NoLocation();
+ }
+}
+
+Location HighOf(Location location) {
+ if (location.IsRegisterPair()) {
+ return Location::RegisterLocation(location.high());
+ } else if (location.IsFpuRegisterPair()) {
+ return Location::FpuRegisterLocation(location.high());
+ } else if (location.IsDoubleStackSlot()) {
+ return Location::StackSlot(location.GetHighStackIndex(4));
+ } else {
+ return Location::NoLocation();
+ }
+}
+
// Update the source of `move`, knowing that `updated_location` has been swapped
// with `new_source`. Note that `updated_location` can be a pair, therefore if
// `move` is non-pair, we need to extract which register to use.
static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
Location source = move->GetSource();
- if (new_source.GetKind() == source.GetKind()) {
- DCHECK(updated_location.Equals(source));
- move->SetSource(new_source);
- } else if (new_source.IsStackSlot()
- || new_source.IsDoubleStackSlot()
- || source.IsStackSlot()
- || source.IsDoubleStackSlot()) {
- // Stack slots never take part of a pair/non-pair swap.
- DCHECK(updated_location.Equals(source));
+ if (LowOf(updated_location).Equals(source)) {
+ move->SetSource(LowOf(new_source));
+ } else if (HighOf(updated_location).Equals(source)) {
+ move->SetSource(HighOf(new_source));
+ } else {
+ DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
move->SetSource(new_source);
- } else if (source.IsRegister()) {
- DCHECK(new_source.IsRegisterPair()) << new_source;
- DCHECK(updated_location.IsRegisterPair()) << updated_location;
- if (updated_location.low() == source.reg()) {
- move->SetSource(Location::RegisterLocation(new_source.low()));
- } else {
- DCHECK_EQ(updated_location.high(), source.reg());
- move->SetSource(Location::RegisterLocation(new_source.high()));
- }
- } else if (source.IsFpuRegister()) {
- DCHECK(new_source.IsFpuRegisterPair()) << new_source;
- DCHECK(updated_location.IsFpuRegisterPair()) << updated_location;
- if (updated_location.low() == source.reg()) {
- move->SetSource(Location::FpuRegisterLocation(new_source.low()));
- } else {
- DCHECK_EQ(updated_location.high(), source.reg());
- move->SetSource(Location::FpuRegisterLocation(new_source.high()));
- }
}
}
@@ -265,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+ int register_count) {
+ int scratch = -1;
+ for (int reg = 0; reg < register_count; ++reg) {
+ if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
+ scratch = reg;
+ break;
+ }
+ }
+
+ return scratch;
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
@@ -278,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+ : resolver_(resolver),
+ reg_(kNoRegister),
+ spilled_(false) {
+ // We don't want to spill a register if none are free.
+ reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 3fa1b37afd..173cffc71e 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject {
protected:
class ScratchRegisterScope : public ValueObject {
public:
+ // Spill a scratch register if no regs are free.
ScratchRegisterScope(ParallelMoveResolver* resolver,
int blocked,
int if_scratch,
int number_of_registers);
+ // Grab a scratch register only if available.
+ ScratchRegisterScope(ParallelMoveResolver* resolver,
+ int blocked,
+ int number_of_registers);
~ScratchRegisterScope();
int GetRegister() const { return reg_; }
@@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject {
// Allocate a scratch register for performing a move. The method will try to use
// a register that is the destination of a move, but that move has not been emitted yet.
int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+ // As above, but return -1 if no free register.
+ int AllocateScratchRegister(int blocked, int register_count);
// Emit a move.
virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 817a44b184..5c502f7ef4 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -31,8 +31,13 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
message_ << "C";
} else if (location.IsPair()) {
message_ << location.low() << "," << location.high();
- } else {
+ } else if (location.IsRegister()) {
message_ << location.reg();
+ } else if (location.IsStackSlot()) {
+ message_ << location.GetStackIndex() << "(sp)";
+ } else {
+ message_ << "2x" << location.GetStackIndex() << "(sp)";
+ DCHECK(location.IsDoubleStackSlot()) << location;
}
}
@@ -279,6 +284,26 @@ TEST(ParallelMoveTest, Pairs) {
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
}
+
+ {
+ // Test involving registers used in single context and pair context.
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(10),
+ Location::RegisterLocation(5),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(4, 5),
+ Location::DoubleStackSlot(32),
+ nullptr);
+ moves->AddMove(
+ Location::DoubleStackSlot(32),
+ Location::RegisterPairLocation(10, 11),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+ }
}
} // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 2d9a2bf330..f5d8d82571 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -60,11 +60,11 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
bool needs_materialization = false;
- if (!condition->GetUses().HasOnlyOneUse()) {
+ if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
needs_materialization = true;
} else {
HInstruction* user = condition->GetUses().GetFirst()->GetUser();
- if (!user->IsIf()) {
+ if (!user->IsIf() && !user->IsDeoptimize()) {
needs_materialization = true;
} else {
// TODO: if there is no intervening instructions with side-effect between this condition
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index cf38bd3f8c..4bca43499f 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1408,26 +1408,36 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
// Walk over all uses covered by this interval, and update the location
// information.
- while (use != nullptr && use->GetPosition() <= current->GetEnd()) {
- LocationSummary* locations = use->GetUser()->GetLocations();
- if (use->GetIsEnvironment()) {
- locations->SetEnvironmentAt(use->GetInputIndex(), source);
- } else {
- Location expected_location = locations->InAt(use->GetInputIndex());
- // The expected (actual) location may be invalid in case the input is unused. Currently
- // this only happens for intrinsics.
- if (expected_location.IsValid()) {
- if (expected_location.IsUnallocated()) {
- locations->SetInAt(use->GetInputIndex(), source);
- } else if (!expected_location.IsConstant()) {
- AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
- }
+
+ LiveRange* range = current->GetFirstRange();
+ while (range != nullptr) {
+ while (use != nullptr && use->GetPosition() < range->GetStart()) {
+ DCHECK(use->GetIsEnvironment());
+ use = use->GetNext();
+ }
+ while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+ DCHECK(current->Covers(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+ LocationSummary* locations = use->GetUser()->GetLocations();
+ if (use->GetIsEnvironment()) {
+ locations->SetEnvironmentAt(use->GetInputIndex(), source);
} else {
- DCHECK(use->GetUser()->IsInvoke());
- DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+ Location expected_location = locations->InAt(use->GetInputIndex());
+ // The expected (actual) location may be invalid in case the input is unused. Currently
+ // this only happens for intrinsics.
+ if (expected_location.IsValid()) {
+ if (expected_location.IsUnallocated()) {
+ locations->SetInAt(use->GetInputIndex(), source);
+ } else if (!expected_location.IsConstant()) {
+ AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+ }
+ } else {
+ DCHECK(use->GetUser()->IsInvoke());
+ DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+ }
}
+ use = use->GetNext();
}
- use = use->GetNext();
+ range = range->GetNext();
}
// If the next interval starts just after this one, and has a register,
@@ -1503,7 +1513,15 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
}
current = next_sibling;
} while (current != nullptr);
- DCHECK(use == nullptr);
+
+ if (kIsDebugBuild) {
+ // Following uses can only be environment uses. The location for
+ // these environments will be none.
+ while (use != nullptr) {
+ DCHECK(use->GetIsEnvironment());
+ use = use->GetNext();
+ }
+ }
}
void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 7c3a0357d6..3951439881 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) {
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = new (&allocator) HGraph(&allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
GrowableArray<LiveInterval*> intervals(&allocator, 0);
// Test with two intervals of the same range.
@@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
{
HGraph* graph = BuildDiv(&allocator, &div);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) {
locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index fcc4e69b37..e154ea4ee6 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -487,7 +487,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
GetGraph()->GetArena(), current_locals_->Size());
environment->CopyFrom(current_locals_);
- instruction->SetEnvironment(environment);
+ instruction->SetRawEnvironment(environment);
}
void SsaBuilder::VisitTemporary(HTemporary* temp) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 0f3973e5fb..95da6ef551 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -218,28 +218,34 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
current->GetLiveInterval()->SetFrom(current->GetLifetimePosition());
}
- // All inputs of an instruction must be live.
- for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
- HInstruction* input = current->InputAt(i);
- // Some instructions 'inline' their inputs, that is they do not need
- // to be materialized.
- if (input->HasSsaIndex()) {
- live_in->SetBit(input->GetSsaIndex());
- input->GetLiveInterval()->AddUse(current, i, false);
- }
- }
-
+ // Process the environment first, because we know their uses come after
+ // or at the same liveness position of inputs.
if (current->HasEnvironment()) {
// Handle environment uses. See statements (b) and (c) of the
// SsaLivenessAnalysis.
HEnvironment* environment = current->GetEnvironment();
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* instruction = environment->GetInstructionAt(i);
- if (ShouldBeLiveForEnvironment(instruction)) {
+ bool should_be_live = ShouldBeLiveForEnvironment(instruction);
+ if (should_be_live) {
DCHECK(instruction->HasSsaIndex());
live_in->SetBit(instruction->GetSsaIndex());
- instruction->GetLiveInterval()->AddUse(current, i, true);
}
+ if (instruction != nullptr) {
+ instruction->GetLiveInterval()->AddUse(
+ current, i, /* is_environment */ true, should_be_live);
+ }
+ }
+ }
+
+ // All inputs of an instruction must be live.
+ for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+ HInstruction* input = current->InputAt(i);
+ // Some instructions 'inline' their inputs, that is they do not need
+ // to be materialized.
+ if (input->HasSsaIndex()) {
+ live_in->SetBit(input->GetSsaIndex());
+ input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
}
}
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index bc78dc2e76..d2da84c0c0 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -189,7 +189,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
AddRange(position, position + 1);
}
- void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
+ void AddUse(HInstruction* instruction,
+ size_t input_index,
+ bool is_environment,
+ bool keep_alive = false) {
// Set the use within the instruction.
size_t position = instruction->GetLifetimePosition() + 1;
LocationSummary* locations = instruction->GetLocations();
@@ -211,6 +214,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
&& (first_use_->GetPosition() < position)) {
// The user uses the instruction multiple times, and one use dies before the other.
// We update the use list so that the latter is first.
+ DCHECK(!is_environment);
UsePosition* cursor = first_use_;
while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) {
cursor = cursor->GetNext();
@@ -225,6 +229,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return;
}
+ first_use_ = new (allocator_) UsePosition(
+ instruction, input_index, is_environment, position, first_use_);
+
+ if (is_environment && !keep_alive) {
+ // If this environment use does not keep the instruction live, it does not
+ // affect the live range of that instruction.
+ return;
+ }
+
size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
if (first_range_ == nullptr) {
// First time we see a use of that interval.
@@ -246,8 +259,6 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// and the check line 205 would succeed.
first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_);
}
- first_use_ = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, first_use_);
}
void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
@@ -425,9 +436,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
UsePosition* use = first_use_;
size_t end = GetEnd();
while (use != nullptr && use->GetPosition() <= end) {
- size_t use_position = use->GetPosition();
- if (use_position > position) {
- return use_position;
+ if (!use->GetIsEnvironment()) {
+ size_t use_position = use->GetPosition();
+ if (use_position > position) {
+ return use_position;
+ }
}
use = use->GetNext();
}
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 5818a37a46..a73c8d77f3 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -27,6 +27,32 @@
namespace art {
+// Helper to build art::StackMapStream::LocationCatalogEntriesIndices.
+class LocationCatalogEntriesIndicesEmptyFn {
+ public:
+ void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const {
+ item.first = DexRegisterLocation::None();
+ }
+ bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const {
+ return item.first == DexRegisterLocation::None();
+ }
+};
+
+// Hash function for art::StackMapStream::LocationCatalogEntriesIndices.
+// This hash function does not create collisions.
+class DexRegisterLocationHashFn {
+ public:
+ size_t operator()(DexRegisterLocation key) const {
+ // Concatenate `key`s fields to create a 64-bit value to be hashed.
+ int64_t kind_and_value =
+ (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_);
+ return inner_hash_fn_(kind_and_value);
+ }
+ private:
+ std::hash<int64_t> inner_hash_fn_;
+};
+
+
/**
* Collects and builds stack maps for a method. All the stack maps
* for a method are placed in a CodeInfo object.
@@ -36,11 +62,13 @@ class StackMapStream : public ValueObject {
explicit StackMapStream(ArenaAllocator* allocator)
: allocator_(allocator),
stack_maps_(allocator, 10),
+ location_catalog_entries_(allocator, 4),
dex_register_locations_(allocator, 10 * 4),
inline_infos_(allocator, 2),
stack_mask_max_(-1),
dex_pc_max_(0),
native_pc_offset_max_(0),
+ register_mask_max_(0),
number_of_stack_maps_with_inline_info_(0),
dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
@@ -101,6 +129,7 @@ class StackMapStream : public ValueObject {
dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
+ register_mask_max_ = std::max(register_mask_max_, register_mask);
}
void AddInlineInfoEntry(uint32_t method_index) {
@@ -111,6 +140,7 @@ class StackMapStream : public ValueObject {
size_t ComputeNeededSize() {
size_t size = CodeInfo::kFixedSize
+ + ComputeDexRegisterLocationCatalogSize()
+ ComputeStackMapsSize()
+ ComputeDexRegisterMapsSize()
+ ComputeInlineInfoSize();
@@ -128,24 +158,43 @@ class StackMapStream : public ValueObject {
ComputeInlineInfoSize(),
ComputeDexRegisterMapsSize(),
dex_pc_max_,
- native_pc_offset_max_);
+ native_pc_offset_max_,
+ register_mask_max_);
+ }
+
+ // Compute the size of the Dex register location catalog of `entry`.
+ size_t ComputeDexRegisterLocationCatalogSize() const {
+ size_t size = DexRegisterLocationCatalog::kFixedSize;
+ for (size_t location_catalog_entry_index = 0;
+ location_catalog_entry_index < location_catalog_entries_.Size();
+ ++location_catalog_entry_index) {
+ DexRegisterLocation dex_register_location =
+ location_catalog_entries_.Get(location_catalog_entry_index);
+ size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+ }
+ return size;
}
- // Compute the size of the Dex register map of `entry`.
size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
+ // Size of the map in bytes.
size_t size = DexRegisterMap::kFixedSize;
- // Add the bit mask for the dex register liveness.
- size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
- for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+ // Add the live bit mask for the Dex register liveness.
+ size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers);
+ // Compute the size of the set of live Dex register entries.
+ size_t number_of_live_dex_registers = 0;
+ for (size_t dex_register_number = 0;
dex_register_number < entry.num_dex_registers;
++dex_register_number) {
if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
- DexRegisterLocation dex_register_location = dex_register_locations_.Get(
- entry.dex_register_locations_start_index + index_in_dex_register_locations);
- size += DexRegisterMap::EntrySize(dex_register_location);
- index_in_dex_register_locations++;
+ ++number_of_live_dex_registers;
}
}
+ size_t map_entries_size_in_bits =
+ DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
+ * number_of_live_dex_registers;
+ size_t map_entries_size_in_bytes =
+ RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
+ size += map_entries_size_in_bytes;
return size;
}
@@ -168,8 +217,16 @@ class StackMapStream : public ValueObject {
+ (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
}
+ size_t ComputeDexRegisterLocationCatalogStart() const {
+ return CodeInfo::kFixedSize;
+ }
+
+ size_t ComputeStackMapsStart() const {
+ return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize();
+ }
+
size_t ComputeDexRegisterMapsStart() {
- return CodeInfo::kFixedSize + ComputeStackMapsSize();
+ return ComputeStackMapsStart() + ComputeStackMapsSize();
}
size_t ComputeInlineInfoStart() {
@@ -194,11 +251,32 @@ class StackMapStream : public ValueObject {
ComputeInlineInfoStart(),
inline_info_size);
- code_info.SetEncoding(
- inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_);
+ code_info.SetEncoding(inline_info_size,
+ dex_register_map_size,
+ dex_pc_max_,
+ native_pc_offset_max_,
+ register_mask_max_);
code_info.SetNumberOfStackMaps(stack_maps_.Size());
code_info.SetStackMaskSize(stack_mask_size);
- DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize());
+ DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize());
+
+ // Set the Dex register location catalog.
+ code_info.SetNumberOfDexRegisterLocationCatalogEntries(
+ location_catalog_entries_.Size());
+ MemoryRegion dex_register_location_catalog_region = region.Subregion(
+ ComputeDexRegisterLocationCatalogStart(),
+ ComputeDexRegisterLocationCatalogSize());
+ DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
+ // Offset in `dex_register_location_catalog` where to store the next
+ // register location.
+ size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
+ for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
+ DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
+ dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
+ location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+ }
+ // Ensure we reached the end of the Dex registers location_catalog.
+ DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
uintptr_t next_dex_register_map_offset = 0;
uintptr_t next_inline_info_offset = 0;
@@ -234,25 +312,25 @@ class StackMapStream : public ValueObject {
stack_map.SetDexRegisterMapOffset(
code_info, register_region.start() - dex_register_locations_region.start());
- // Offset in `dex_register_map` where to store the next register entry.
- size_t offset = DexRegisterMap::kFixedSize;
- dex_register_map.SetLiveBitMask(offset,
- entry.num_dex_registers,
- *entry.live_dex_registers_mask);
- offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
+ // Set the live bit mask.
+ dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask);
+
+ // Set the dex register location mapping data.
for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
dex_register_number < entry.num_dex_registers;
++dex_register_number) {
if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
- DexRegisterLocation dex_register_location = dex_register_locations_.Get(
- entry.dex_register_locations_start_index + index_in_dex_register_locations);
- dex_register_map.SetRegisterInfo(offset, dex_register_location);
- offset += DexRegisterMap::EntrySize(dex_register_location);
+ size_t location_catalog_entry_index =
+ dex_register_locations_.Get(entry.dex_register_locations_start_index
+ + index_in_dex_register_locations);
+ dex_register_map.SetLocationCatalogEntryIndex(
+ index_in_dex_register_locations,
+ location_catalog_entry_index,
+ entry.num_dex_registers,
+ location_catalog_entries_.Size());
++index_in_dex_register_locations;
}
}
- // Ensure we reached the end of the Dex registers region.
- DCHECK_EQ(offset, register_region.size());
}
}
@@ -282,12 +360,31 @@ class StackMapStream : public ValueObject {
}
void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) {
+ StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+ DCHECK_LT(dex_register, entry.num_dex_registers);
+
if (kind != DexRegisterLocation::Kind::kNone) {
// Ensure we only use non-compressed location kind at this stage.
DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
<< DexRegisterLocation::PrettyDescriptor(kind);
- dex_register_locations_.Add(DexRegisterLocation(kind, value));
- StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+ DexRegisterLocation location(kind, value);
+
+ // Look for Dex register `location` in the location catalog (using the
+ // companion hash map of locations to indices). Use its index if it
+ // is already in the location catalog. If not, insert it (in the
+ // location catalog and the hash map) and use the newly created index.
+ auto it = location_catalog_entries_indices_.Find(location);
+ if (it != location_catalog_entries_indices_.end()) {
+ // Retrieve the index from the hash map.
+ dex_register_locations_.Add(it->second);
+ } else {
+ // Create a new entry in the location catalog and the hash map.
+ size_t index = location_catalog_entries_.Size();
+ location_catalog_entries_.Add(location);
+ dex_register_locations_.Add(index);
+ location_catalog_entries_indices_.Insert(std::make_pair(location, index));
+ }
+
entry.live_dex_registers_mask->SetBit(dex_register);
entry.dex_register_map_hash += (1 << dex_register);
entry.dex_register_map_hash += static_cast<uint32_t>(value);
@@ -354,9 +451,9 @@ class StackMapStream : public ValueObject {
return false;
}
if (a.live_dex_registers_mask->IsBitSet(i)) {
- DexRegisterLocation a_loc = dex_register_locations_.Get(
+ size_t a_loc = dex_register_locations_.Get(
a.dex_register_locations_start_index + index_in_dex_register_locations);
- DexRegisterLocation b_loc = dex_register_locations_.Get(
+ size_t b_loc = dex_register_locations_.Get(
b.dex_register_locations_start_index + index_in_dex_register_locations);
if (a_loc != b_loc) {
return false;
@@ -369,21 +466,29 @@ class StackMapStream : public ValueObject {
ArenaAllocator* allocator_;
GrowableArray<StackMapEntry> stack_maps_;
- GrowableArray<DexRegisterLocation> dex_register_locations_;
+
+ // A catalog of unique [location_kind, register_value] pairs (per method).
+ GrowableArray<DexRegisterLocation> location_catalog_entries_;
+ // Map from Dex register location catalog entries to their indices in the
+ // location catalog.
+ typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
+ DexRegisterLocationHashFn> LocationCatalogEntriesIndices;
+ LocationCatalogEntriesIndices location_catalog_entries_indices_;
+
+ // A set of concatenated maps of Dex register locations indices to
+ // `location_catalog_entries_`.
+ GrowableArray<size_t> dex_register_locations_;
GrowableArray<InlineInfoEntry> inline_infos_;
int stack_mask_max_;
uint32_t dex_pc_max_;
uint32_t native_pc_offset_max_;
+ uint32_t register_mask_max_;
size_t number_of_stack_maps_with_inline_info_;
ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
static constexpr uint32_t kNoSameDexMapFound = -1;
- ART_FRIEND_TEST(StackMapTest, Test1);
- ART_FRIEND_TEST(StackMapTest, Test2);
- ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters);
-
DISALLOW_COPY_AND_ASSIGN(StackMapStream);
};
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index e5a9790254..8d160bc81e 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -31,6 +31,8 @@ static bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
return true;
}
+using Kind = DexRegisterLocation::Kind;
+
TEST(StackMapTest, Test1) {
ArenaPool pool;
ArenaAllocator arena(&pool);
@@ -39,8 +41,8 @@ TEST(StackMapTest, Test1) {
ArenaBitVector sp_mask(&arena, 0, false);
size_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -51,6 +53,16 @@ TEST(StackMapTest, Test1) {
ASSERT_EQ(0u, code_info.GetStackMaskSize());
ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(2u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ // The Dex register location catalog contains:
+ // - one 1-byte short Dex register location, and
+ // - one 5-byte large Dex register location.
+ size_t expected_location_catalog_size = 1u + 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
StackMap stack_map = code_info.GetStackMapAt(0);
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
@@ -62,14 +74,40 @@ TEST(StackMapTest, Test1) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask));
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(7u, dex_registers.Size());
- DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
- DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+ DexRegisterMap dex_register_map =
+ code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_dex_register_map_size = 1u + 1u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstant,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(0u, index0);
+ ASSERT_EQ(1u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
@@ -86,8 +124,8 @@ TEST(StackMapTest, Test2) {
sp_mask1.SetBit(4);
size_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
stream.AddInlineInfoEntry(42);
stream.AddInlineInfoEntry(82);
@@ -95,8 +133,8 @@ TEST(StackMapTest, Test2) {
sp_mask2.SetBit(3);
sp_mask1.SetBit(8);
stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -107,6 +145,16 @@ TEST(StackMapTest, Test2) {
ASSERT_EQ(1u, code_info.GetStackMaskSize());
ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(4u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ // The Dex register location catalog contains:
+ // - three 1-byte short Dex register locations, and
+ // - one 5-byte large Dex register location.
+ size_t expected_location_catalog_size = 3u * 1u + 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
// First stack map.
{
StackMap stack_map = code_info.GetStackMapAt(0);
@@ -120,17 +168,40 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers =
+ DexRegisterMap dex_register_map =
code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(7u, dex_registers.Size());
- DexRegisterLocation location0 =
- dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
- DexRegisterLocation location1 =
- dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_dex_register_map_size = 1u + 1u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstant,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(0u, index0);
+ ASSERT_EQ(1u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
@@ -154,17 +225,40 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers =
+ DexRegisterMap dex_register_map =
code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(3u, dex_registers.Size());
- DexRegisterLocation location0 =
- dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
- DexRegisterLocation location1 =
- dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
- ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind());
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_dex_register_map_size = 1u + 1u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kInRegister,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInFpuRegister,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInRegister,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInFpuRegister,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(18, dex_register_map.GetMachineRegister(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(3, dex_register_map.GetMachineRegister(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(2u, index0);
+ ASSERT_EQ(3u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInRegister, location0.GetKind());
+ ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind());
+ ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind());
ASSERT_EQ(18, location0.GetValue());
ASSERT_EQ(3, location1.GetValue());
@@ -180,8 +274,8 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
ArenaBitVector sp_mask(&arena, 0, false);
uint32_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -189,14 +283,62 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
stream.FillIn(region);
CodeInfo code_info(region);
+ ASSERT_EQ(0u, code_info.GetStackMaskSize());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(1u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ // The Dex register location catalog contains:
+ // - one 5-byte large Dex register location.
+ size_t expected_location_catalog_size = 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+ ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+ ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+ ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
+
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
- ASSERT_EQ(DexRegisterLocation::Kind::kNone,
- dex_registers.GetLocationKind(0, number_of_dex_registers));
- ASSERT_EQ(DexRegisterLocation::Kind::kConstant,
- dex_registers.GetLocationKind(1, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers));
+ DexRegisterMap dex_register_map =
+ code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask.
+ // No space is allocated for the sole location catalog entry index, as it is useless.
+ size_t expected_dex_register_map_size = 1u + 0u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kNone,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstant,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kNone,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
+ ASSERT_EQ(0u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kNone, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kNone, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_EQ(0, location0.GetValue());
+ ASSERT_EQ(-2, location1.GetValue());
+
ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
}
@@ -209,14 +351,21 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
StackMapStream stream(&arena);
ArenaBitVector sp_mask(&arena, 0, false);
- uint32_t number_of_dex_registers = 0xEA;
+ uint32_t number_of_dex_registers = 1024;
+ // Create the first stack map (and its Dex register map).
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) {
- stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+ uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
+ for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
+ // Use two different Dex register locations to populate this map,
+ // as using a single value (in the whole CodeInfo object) would
+ // make this Dex register mapping data empty (see
+ // art::DexRegisterMap::SingleEntrySizeInBits).
+ stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location.
}
+ // Create the second stack map (and its Dex register map).
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
- stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+ stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location.
}
size_t size = stream.ComputeNeededSize();
@@ -225,10 +374,35 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
stream.FillIn(region);
CodeInfo code_info(region);
- StackMap stack_map = code_info.GetStackMapAt(1);
- ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
- ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding);
+ // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
+ // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
+ // has a size of 1 bit.
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(2u, number_of_location_catalog_entries);
+ ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries));
+
+ // The first Dex register map contains:
+ // - a live register bit mask for 1024 registers (that is, 128 bytes of
+ // data); and
+ // - Dex register mapping information for 1016 1-bit Dex (live) register
+ // locations (that is, 127 bytes of data).
+ // Hence it has a size of 255 bytes, and therefore...
+ ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers));
+ StackMap stack_map0 = code_info.GetStackMapAt(0);
+ DexRegisterMap dex_register_map0 =
+ code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers);
+ ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
+ number_of_location_catalog_entries));
+ ASSERT_EQ(255u, dex_register_map0.Size());
+
+ StackMap stack_map1 = code_info.GetStackMapAt(1);
+ ASSERT_TRUE(stack_map1.HasDexRegisterMap(code_info));
+ // ...the offset of the second Dex register map (relative to the
+ // beginning of the Dex register maps region) is 255 (i.e.,
+ // kNoDexRegisterMapSmallEncoding).
+ ASSERT_NE(stack_map1.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
+ ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(code_info), 0xFFu);
}
TEST(StackMapTest, TestShareDexRegisterMap) {
@@ -240,16 +414,16 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
uint32_t number_of_dex_registers = 2;
// First stack map.
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
// Second stack map, which should share the same dex register map.
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
// Third stack map (doesn't share the dex register map).
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -260,20 +434,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
// Verify first stack map.
StackMap sm0 = ci.GetStackMapAt(0);
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers));
+ ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci));
+ ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci));
// Verify second stack map.
StackMap sm1 = ci.GetStackMapAt(1);
DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers));
+ ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci));
+ ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci));
// Verify third stack map.
StackMap sm2 = ci.GetStackMapAt(2);
DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers);
- ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers));
+ ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci));
+ ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci));
// Verify dex register map offsets.
ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci));
@@ -281,4 +455,39 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci));
}
+TEST(StackMapTest, TestNoDexRegisterMap) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ StackMapStream stream(&arena);
+
+ ArenaBitVector sp_mask(&arena, 0, false);
+ uint32_t number_of_dex_registers = 0;
+ stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+
+ size_t size = stream.ComputeNeededSize();
+ void* memory = arena.Alloc(size, kArenaAllocMisc);
+ MemoryRegion region(memory, size);
+ stream.FillIn(region);
+
+ CodeInfo code_info(region);
+ ASSERT_EQ(0u, code_info.GetStackMaskSize());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(0u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ ASSERT_EQ(0u, location_catalog.Size());
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+ ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+ ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+ ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
+
+ ASSERT_FALSE(stack_map.HasDexRegisterMap(code_info));
+ ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
+}
+
} // namespace art