diff options
| -rw-r--r-- | build/Android.gtest.mk | 1 | ||||
| -rw-r--r-- | compiler/dex/bb_optimizations.h | 10 | ||||
| -rw-r--r-- | compiler/dex/dataflow_iterator-inl.h | 50 | ||||
| -rw-r--r-- | compiler/dex/dataflow_iterator.h | 46 | ||||
| -rw-r--r-- | compiler/dex/global_value_numbering.cc | 88 | ||||
| -rw-r--r-- | compiler/dex/global_value_numbering.h | 18 | ||||
| -rw-r--r-- | compiler/dex/global_value_numbering_test.cc | 45 | ||||
| -rw-r--r-- | compiler/dex/local_value_numbering.cc | 26 | ||||
| -rw-r--r-- | compiler/dex/mir_graph.cc | 266 | ||||
| -rw-r--r-- | compiler/dex/mir_graph.h | 25 | ||||
| -rw-r--r-- | compiler/dex/mir_graph_test.cc | 381 | ||||
| -rw-r--r-- | compiler/dex/mir_optimization.cc | 10 | ||||
| -rw-r--r-- | compiler/dex/mir_optimization_test.cc | 2 | ||||
| -rw-r--r-- | compiler/dex/pass_driver_me.h | 3 | ||||
| -rw-r--r-- | compiler/dex/pass_me.h | 1 | ||||
| -rw-r--r-- | compiler/dex/vreg_analysis.cc | 6 |
16 files changed, 816 insertions, 162 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index d75644d8ff..2cba0ec9ce 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -127,6 +127,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ runtime/reflection_test.cc \ compiler/dex/global_value_numbering_test.cc \ compiler/dex/local_value_numbering_test.cc \ + compiler/dex/mir_graph_test.cc \ compiler/dex/mir_optimization_test.cc \ compiler/driver/compiler_driver_test.cc \ compiler/elf_writer_test.cc \ diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index d1d5ad9715..7395324ea4 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -172,7 +172,7 @@ class NullCheckEliminationAndTypeInference : public PassME { class ClassInitCheckElimination : public PassME { public: ClassInitCheckElimination() - : PassME("ClInitCheckElimination", kRepeatingTopologicalSortTraversal) { + : PassME("ClInitCheckElimination", kLoopRepeatingTopologicalSortTraversal) { } bool Gate(const PassDataHolder* data) const { @@ -207,17 +207,17 @@ class ClassInitCheckElimination : public PassME { class GlobalValueNumberingPass : public PassME { public: GlobalValueNumberingPass() - : PassME("GVN", kRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") { + : PassME("GVN", kLoopRepeatingTopologicalSortTraversal, "4_post_gvn_cfg") { } - bool Gate(const PassDataHolder* data) const { + bool Gate(const PassDataHolder* data) const OVERRIDE { DCHECK(data != nullptr); CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; DCHECK(cUnit != nullptr); return cUnit->mir_graph->ApplyGlobalValueNumberingGate(); } - bool Worker(const PassDataHolder* data) const { + bool Worker(const PassDataHolder* data) const OVERRIDE { DCHECK(data != nullptr); const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); CompilationUnit* cUnit = pass_me_data_holder->c_unit; @@ -227,7 +227,7 @@ class GlobalValueNumberingPass : public PassME { return cUnit->mir_graph->ApplyGlobalValueNumbering(bb); } - void End(PassDataHolder* data) const { + void End(PassDataHolder* data) const OVERRIDE { DCHECK(data != nullptr); CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(cUnit != nullptr); diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h index f8b9c1a952..d1abf7fa12 100644 --- a/compiler/dex/dataflow_iterator-inl.h +++ b/compiler/dex/dataflow_iterator-inl.h @@ -121,6 +121,56 @@ inline BasicBlock* AllNodesIterator::Next(bool had_change) { return res; } +inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) { + if (idx_ != 0) { + // Mark last processed block visited. + BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx_ - 1)); + bb->visited = true; + if (had_change) { + // If we had a change we need to revisit the children. + ChildBlockIterator iter(bb, mir_graph_); + for (BasicBlock* child_bb = iter.Next(); child_bb != nullptr; child_bb = iter.Next()) { + child_bb->visited = false; + } + } + } + + while (true) { + // Pop loops we have left and check if we need to recalculate one of them. + // NOTE: We need to do this even if idx_ == end_idx_. + while (loop_head_stack_->Size() != 0u && + loop_ends_->Get(loop_head_stack_->Peek().first) == idx_) { + auto top = loop_head_stack_->Peek(); + uint16_t loop_head_idx = top.first; + bool recalculated = top.second; + loop_head_stack_->Pop(); + BasicBlock* loop_head = mir_graph_->GetBasicBlock(block_id_list_->Get(loop_head_idx)); + DCHECK(loop_head != nullptr); + if (!recalculated || !loop_head->visited) { + loop_head_stack_->Insert(std::make_pair(loop_head_idx, true)); // Recalculating this loop. + idx_ = loop_head_idx + 1; + return loop_head; + } + } + + if (idx_ == end_idx_) { + return nullptr; + } + + // Get next block and return it if unvisited. + BasicBlockId idx = idx_; + idx_ += 1; + BasicBlock* bb = mir_graph_->GetBasicBlock(block_id_list_->Get(idx)); + DCHECK(bb != nullptr); + if (!bb->visited) { + if (loop_ends_->Get(idx) != 0u) { + loop_head_stack_->Insert(std::make_pair(idx, false)); // Not recalculating. + } + return bb; + } + } +} + } // namespace art #endif // ART_COMPILER_DEX_DATAFLOW_ITERATOR_INL_H_ diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h index 66c524fc98..06d6832340 100644 --- a/compiler/dex/dataflow_iterator.h +++ b/compiler/dex/dataflow_iterator.h @@ -388,6 +388,52 @@ namespace art { } }; + /** + * @class LoopRepeatingTopologicalSortIterator + * @brief Used to perform a Topological Sort Iteration of a MIRGraph, repeating loops as needed. + * @details The iterator uses the visited flags to keep track of the blocks that need + * recalculation and keeps a stack of loop heads in the MIRGraph. At the end of the loop + * it returns back to the loop head if it needs to be recalculated. Due to the use of + * the visited flags and the loop head stack in the MIRGraph, it's not possible to use + * two iterators at the same time or modify this data during iteration (though inspection + * of this data is allowed and sometimes even expected). + * + * NOTE: This iterator is not suitable for passes that need to propagate changes to + * predecessors, such as type inferrence. + */ + class LoopRepeatingTopologicalSortIterator : public DataflowIterator { + public: + /** + * @brief The constructor, using all of the reachable blocks of the MIRGraph. + * @param mir_graph The MIRGraph considered. + */ + explicit LoopRepeatingTopologicalSortIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()), + loop_ends_(mir_graph->GetTopologicalSortOrderLoopEnds()), + loop_head_stack_(mir_graph_->GetTopologicalSortOrderLoopHeadStack()) { + // Extra setup for RepeatingTopologicalSortIterator. + idx_ = start_idx_; + block_id_list_ = mir_graph->GetTopologicalSortOrder(); + // Clear visited flags and check that the loop head stack is empty. + mir_graph->ClearAllVisitedFlags(); + DCHECK_EQ(loop_head_stack_->Size(), 0u); + } + + ~LoopRepeatingTopologicalSortIterator() { + DCHECK_EQ(loop_head_stack_->Size(), 0u); + } + + /** + * @brief Get the next BasicBlock depending on iteration order. + * @param had_change did the user of the iteration change the previous BasicBlock. + * @return the next BasicBlock following the iteration order, 0 if finished. + */ + virtual BasicBlock* Next(bool had_change = false) OVERRIDE; + + private: + const GrowableArray<BasicBlockId>* const loop_ends_; + GrowableArray<std::pair<uint16_t, bool>>* const loop_head_stack_; + }; } // namespace art diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc index 614e826fa6..d86be4e0e7 100644 --- a/compiler/dex/global_value_numbering.cc +++ b/compiler/dex/global_value_numbering.cc @@ -22,8 +22,10 @@ namespace art { GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator) : cu_(cu), + mir_graph_(cu->mir_graph.get()), allocator_(allocator), - repeat_count_(0u), + bbs_processed_(0u), + max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()), last_value_(0u), modifications_allowed_(false), global_value_map_(std::less<uint64_t>(), allocator->Adapter()), @@ -32,10 +34,9 @@ GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAlloc array_location_map_(ArrayLocationComparator(), allocator->Adapter()), array_location_reverse_map_(allocator->Adapter()), ref_set_map_(std::less<ValueNameSet>(), allocator->Adapter()), - lvns_(cu_->mir_graph->GetNumBlocks(), nullptr, allocator->Adapter()), + lvns_(mir_graph_->GetNumBlocks(), nullptr, allocator->Adapter()), work_lvn_(nullptr), merge_lvns_(allocator->Adapter()) { - cu_->mir_graph->ClearAllVisitedFlags(); } GlobalValueNumbering::~GlobalValueNumbering() { @@ -46,21 +47,15 @@ LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb) { if (UNLIKELY(!Good())) { return nullptr; } - if (bb->data_flow_info == nullptr) { + if (UNLIKELY(bb->data_flow_info == nullptr)) { return nullptr; } - if (bb->block_type == kEntryBlock) { - repeat_count_ += 1u; - if (repeat_count_ > kMaxRepeatCount) { - last_value_ = kNoValue; // Make bad. - return nullptr; - } - } - if (bb->block_type == kExitBlock) { + if (UNLIKELY(bb->block_type == kExitBlock)) { DCHECK(bb->first_mir_insn == nullptr); return nullptr; } - if (bb->visited) { + if (UNLIKELY(bbs_processed_ == max_bbs_to_process_)) { + last_value_ = kNoValue; // Make bad. return nullptr; } DCHECK(work_lvn_.get() == nullptr); @@ -72,13 +67,34 @@ LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb) { work_lvn_->SetSRegNullChecked(this_reg); } } else { - // Merge all incoming arcs. // To avoid repeated allocation on the ArenaStack, reuse a single vector kept as a member. DCHECK(merge_lvns_.empty()); + // If we're running the full GVN, the RepeatingTopologicalSortIterator keeps the loop + // head stack in the MIRGraph up to date and for a loop head we need to check whether + // we're making the initial computation and need to merge only preceding blocks in the + // topological order, or we're recalculating a loop head and need to merge all incoming + // LVNs. When we're not at a loop head (including having an empty loop head stack) all + // predecessors should be preceding blocks and we shall merge all of them anyway. + // + // If we're running the modification phase of the full GVN, the loop head stack will be + // empty and we need to merge all incoming LVNs. If we're running just a simple LVN, + // the loop head stack will also be empty and there will be nothing to merge anyway. + bool use_all_predecessors = true; + uint16_t loop_head_idx = 0u; // Used only if !use_all_predecessors. + if (mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Size() != 0) { + // Full GVN inside a loop, see if we're at the loop head for the first time. + auto top = mir_graph_->GetTopologicalSortOrderLoopHeadStack()->Peek(); + loop_head_idx = top.first; + bool recalculating = top.second; + use_all_predecessors = recalculating || + loop_head_idx != mir_graph_->GetTopologicalSortOrderIndexes()->Get(bb->id); + } GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors); - for (BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next()); - pred_bb != nullptr; pred_bb = cu_->mir_graph->GetBasicBlock(iter.Next())) { - if (lvns_[pred_bb->id] != nullptr) { + for (BasicBlock* pred_bb = mir_graph_->GetBasicBlock(iter.Next()); + pred_bb != nullptr; pred_bb = mir_graph_->GetBasicBlock(iter.Next())) { + if (lvns_[pred_bb->id] != nullptr && + (use_all_predecessors || + mir_graph_->GetTopologicalSortOrderIndexes()->Get(pred_bb->id) < loop_head_idx)) { merge_lvns_.push_back(lvns_[pred_bb->id]); } } @@ -87,19 +103,22 @@ LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb) { if (bb->catch_entry) { merge_type = LocalValueNumbering::kCatchMerge; } else if (bb->last_mir_insn != nullptr && - (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN || + (bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_VOID || + bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN || bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_OBJECT || bb->last_mir_insn->dalvikInsn.opcode == Instruction::RETURN_WIDE) && (bb->first_mir_insn == bb->last_mir_insn || - (bb->first_mir_insn->next == bb->last_mir_insn && - static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi))) { + (static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpPhi && + (bb->first_mir_insn->next == bb->last_mir_insn || + (static_cast<int>(bb->first_mir_insn->next->dalvikInsn.opcode) == kMirOpPhi && + bb->first_mir_insn->next->next == bb->last_mir_insn))))) { merge_type = LocalValueNumbering::kReturnMerge; } // At least one predecessor must have been processed before this bb. CHECK(!merge_lvns_.empty()); if (merge_lvns_.size() == 1u) { work_lvn_->MergeOne(*merge_lvns_[0], merge_type); - BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(merge_lvns_[0]->Id()); + BasicBlock* pred_bb = mir_graph_->GetBasicBlock(merge_lvns_[0]->Id()); if (HasNullCheckLastInsn(pred_bb, bb->id)) { work_lvn_->SetSRegNullChecked(pred_bb->last_mir_insn->ssa_rep->uses[0]); } @@ -112,32 +131,13 @@ LocalValueNumbering* GlobalValueNumbering::PrepareBasicBlock(BasicBlock* bb) { bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) { DCHECK(work_lvn_ != nullptr); - DCHECK(bb->id == work_lvn_->Id()); + DCHECK_EQ(bb->id, work_lvn_->Id()); + ++bbs_processed_; merge_lvns_.clear(); - bool change = false; - // Look for a branch to self or an already processed child. - // (No need to repeat the LVN if all children are processed later.) - ChildBlockIterator iter(bb, cu_->mir_graph.get()); - for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) { - if (child == bb || lvns_[child->id] != nullptr) { - // If we found an already processed child, check if the LVN actually differs. - change = (lvns_[bb->id] == nullptr || !lvns_[bb->id]->Equals(*work_lvn_)); - break; - } - } - std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]); lvns_[bb->id] = work_lvn_.release(); - - bb->visited = true; - if (change) { - ChildBlockIterator iter(bb, cu_->mir_graph.get()); - for (BasicBlock* child = iter.Next(); child != nullptr; child = iter.Next()) { - child->visited = false; - } - } - return change; + return (old_lvn == nullptr) || !old_lvn->Equals(*lvns_[bb->id]); } uint16_t GlobalValueNumbering::GetFieldId(const MirFieldInfo& field_info, uint16_t type) { @@ -188,7 +188,7 @@ bool GlobalValueNumbering::NullCheckedInAllPredecessors( uint16_t value_name = merge_names[i]; if (!pred_lvn->IsValueNullChecked(value_name)) { // Check if the predecessor has an IF_EQZ/IF_NEZ as the last insn. - const BasicBlock* pred_bb = cu_->mir_graph->GetBasicBlock(pred_lvn->Id()); + const BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_lvn->Id()); if (!HasNullCheckLastInsn(pred_bb, work_lvn_->Id())) { return false; } diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h index 7ab77b733f..a12a7791ab 100644 --- a/compiler/dex/global_value_numbering.h +++ b/compiler/dex/global_value_numbering.h @@ -31,7 +31,10 @@ class GlobalValueNumbering { GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator); ~GlobalValueNumbering(); + // Prepare LVN for the basic block. LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb); + + // Finish processing the basic block. bool FinishBasicBlock(BasicBlock* bb); // Checks that the value names didn't overflow. @@ -42,7 +45,6 @@ class GlobalValueNumbering { // Allow modifications. void AllowModifications() { DCHECK(Good()); - cu_->mir_graph->ClearAllVisitedFlags(); modifications_allowed_ = true; } @@ -182,7 +184,7 @@ class GlobalValueNumbering { } const BasicBlock* GetBasicBlock(uint16_t bb_id) const { - return cu_->mir_graph->GetBasicBlock(bb_id); + return mir_graph_->GetBasicBlock(bb_id); } static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id); @@ -194,7 +196,7 @@ class GlobalValueNumbering { } MIRGraph* GetMirGraph() const { - return cu_->mir_graph.get(); + return mir_graph_; } ScopedArenaAllocator* Allocator() const { @@ -202,12 +204,16 @@ class GlobalValueNumbering { } CompilationUnit* const cu_; + MIRGraph* mir_graph_; ScopedArenaAllocator* const allocator_; - static constexpr uint32_t kMaxRepeatCount = 10u; + // The number of BBs that we need to process grows exponentially with the number + // of nested loops. Don't allow excessive processing for too many nested loops or + // otherwise expensive methods. + static constexpr uint32_t kMaxBbsToProcessMultiplyFactor = 20u; - // Track the repeat count to make sure the GVN converges quickly and abort the GVN otherwise. - uint32_t repeat_count_; + uint32_t bbs_processed_; + uint32_t max_bbs_to_process_; // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good(). // We usually don't check Good() until the end of LVN unless we're about to modify code. diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc index 18adbabdf7..c82d2310c9 100644 --- a/compiler/dex/global_value_numbering_test.cc +++ b/compiler/dex/global_value_numbering_test.cc @@ -273,23 +273,20 @@ class GlobalValueNumberingTest : public testing::Test { } void PerformGVN() { - cu_.mir_graph->SSATransformationStart(); - cu_.mir_graph->ComputeDFSOrders(); - cu_.mir_graph->ComputeDominators(); - cu_.mir_graph->ComputeTopologicalSortOrder(); - cu_.mir_graph->SSATransformationEnd(); - DoPerformGVN<RepeatingPreOrderDfsIterator>(); + DoPerformGVN<LoopRepeatingTopologicalSortIterator>(); } void PerformPreOrderDfsGVN() { - cu_.mir_graph->SSATransformationStart(); - cu_.mir_graph->ComputeDFSOrders(); - cu_.mir_graph->SSATransformationEnd(); DoPerformGVN<RepeatingPreOrderDfsIterator>(); } template <typename IteratorType> void DoPerformGVN() { + cu_.mir_graph->SSATransformationStart(); + cu_.mir_graph->ComputeDFSOrders(); + cu_.mir_graph->ComputeDominators(); + cu_.mir_graph->ComputeTopologicalSortOrder(); + cu_.mir_graph->SSATransformationEnd(); ASSERT_TRUE(gvn_ == nullptr); gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get())); ASSERT_FALSE(gvn_->CanModify()); @@ -313,7 +310,7 @@ class GlobalValueNumberingTest : public testing::Test { ASSERT_TRUE(gvn_->Good()); ASSERT_FALSE(gvn_->CanModify()); gvn_->AllowModifications(); - PreOrderDfsIterator iterator(cu_.mir_graph.get()); + TopologicalSortIterator iterator(cu_.mir_graph.get()); for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) { LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb); if (lvn != nullptr) { @@ -340,7 +337,6 @@ class GlobalValueNumberingTest : public testing::Test { cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); cu_.access_flags = kAccStatic; // Don't let "this" interfere with this test. allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack)); - // gvn_->AllowModifications(); } ArenaPool pool_; @@ -1917,7 +1913,7 @@ TEST_F(GlobalValueNumberingTest, InfiniteLocationLoop) { PerformPreOrderDfsGVN(); } -TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_IFieldAndPhi) { +TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, IFieldAndPhi) { static const IFieldDef ifields[] = { { 0u, 1u, 0u, false }, // Int. }; @@ -1954,7 +1950,7 @@ TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_IFieldAndPhi) { EXPECT_EQ(value_names_[5], value_names_[12]); } -TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_NullCheck) { +TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, NullCheck) { static const IFieldDef ifields[] = { { 0u, 1u, 0u, false }, // Int. }; @@ -2024,14 +2020,10 @@ TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_NullCheck) { EXPECT_NE(value_names_[2], value_names_[6]); EXPECT_NE(value_names_[3], value_names_[7]); EXPECT_NE(value_names_[4], value_names_[8]); - EXPECT_NE(value_names_[0], value_names_[12]); - EXPECT_NE(value_names_[1], value_names_[13]); - EXPECT_NE(value_names_[2], value_names_[14]); - EXPECT_NE(value_names_[3], value_names_[15]); EXPECT_EQ(value_names_[4], value_names_[12]); - EXPECT_NE(value_names_[5], value_names_[13]); - EXPECT_NE(value_names_[6], value_names_[14]); - EXPECT_NE(value_names_[7], value_names_[15]); + EXPECT_EQ(value_names_[5], value_names_[13]); + EXPECT_EQ(value_names_[6], value_names_[14]); + EXPECT_EQ(value_names_[7], value_names_[15]); EXPECT_EQ(value_names_[12], value_names_[20]); EXPECT_EQ(value_names_[13], value_names_[21]); EXPECT_EQ(value_names_[14], value_names_[22]); @@ -2049,7 +2041,7 @@ TEST_F(GlobalValueNumberingTestTwoConsecutiveLoops, DISABLED_NullCheck) { } } -TEST_F(GlobalValueNumberingTestTwoNestedLoops, DISABLED_IFieldAndPhi) { +TEST_F(GlobalValueNumberingTestTwoNestedLoops, IFieldAndPhi) { static const IFieldDef ifields[] = { { 0u, 1u, 0u, false }, // Int. }; @@ -2097,7 +2089,7 @@ TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) { static const BBDef bbs[] = { DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), - DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(4)), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)), DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)), DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(3)), DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(3, 4)), @@ -2108,6 +2100,15 @@ TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) { PrepareBasicBlocks(bbs); BasicBlock* catch_handler = cu_.mir_graph->GetBasicBlock(5u); catch_handler->catch_entry = true; + // Add successor block info to the check block. + BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u); + check_bb->successor_block_list_type = kCatch; + check_bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>( + &cu_.arena, 2, kGrowableArraySuccessorBlocks); + SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*> + (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor)); + successor_block_info->block = catch_handler->id; + check_bb->successor_blocks->Insert(successor_block_info); BasicBlock* merge_block = cu_.mir_graph->GetBasicBlock(4u); std::swap(merge_block->taken, merge_block->fall_through); PrepareMIRs(mirs); diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc index ef893fe0ba..0e072ecb6c 100644 --- a/compiler/dex/local_value_numbering.cc +++ b/compiler/dex/local_value_numbering.cc @@ -534,6 +534,10 @@ void LocalValueNumbering::InPlaceIntersectMaps(Map* work_map, const Map& other_m (!cmp(entry, *work_it) && !(work_it->second == entry.second)))) { work_it = work_map->erase(work_it); } + if (work_it == work_end) { + return; + } + ++work_it; } } @@ -855,13 +859,18 @@ void LocalValueNumbering::Merge(MergeType merge_type) { MergeMemoryVersions(merge_type == kCatchMerge); // Merge non-aliasing maps/sets. - MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_, - &LocalValueNumbering::MergeNonAliasingIFieldValues>(); - MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_, - &LocalValueNumbering::MergeAliasingValues< - NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_, - NonAliasingArrayVersions>>(); IntersectSets<ValueNameSet, &LocalValueNumbering::non_aliasing_refs_>(); + if (!non_aliasing_refs_.empty() && merge_type == kCatchMerge) { + PruneNonAliasingRefsForCatch(); + } + if (!non_aliasing_refs_.empty()) { + MergeSets<IFieldLocToValueMap, &LocalValueNumbering::non_aliasing_ifield_value_map_, + &LocalValueNumbering::MergeNonAliasingIFieldValues>(); + MergeSets<NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_, + &LocalValueNumbering::MergeAliasingValues< + NonAliasingArrayValuesMap, &LocalValueNumbering::non_aliasing_array_value_map_, + NonAliasingArrayVersions>>(); + } // We won't do anything complicated for range checks, just calculate the intersection. IntersectSets<RangeCheckSet, &LocalValueNumbering::range_checked_>(); @@ -872,7 +881,6 @@ void LocalValueNumbering::Merge(MergeType merge_type) { if (merge_type == kCatchMerge) { // Memory is clobbered. New memory version already created, don't merge aliasing locations. - PruneNonAliasingRefsForCatch(); return; } @@ -1361,8 +1369,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::MONITOR_EXIT: HandleNullCheck(mir, GetOperandValue(mir->ssa_rep->uses[0])); // If we're running GVN and CanModify(), uneliminated null check indicates bytecode error. - if ((gvn_->cu_->disable_opt & (1 << kGlobalValueNumbering)) == 0 && gvn_->CanModify() && - (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) { + if ((gvn_->GetCompilationUnit()->disable_opt & (1u << kGlobalValueNumbering)) == 0u && + gvn_->CanModify() && (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0) { LOG(WARNING) << "Bytecode error: MONITOR_EXIT is still null checked at 0x" << std::hex << mir->offset << " in " << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file); } diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 1c8a9b5079..331af21eda 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -84,6 +84,9 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) dfs_post_order_(NULL), dom_post_order_traversal_(NULL), topological_order_(nullptr), + topological_order_loop_ends_(nullptr), + topological_order_indexes_(nullptr), + topological_order_loop_head_stack_(nullptr), i_dom_list_(NULL), def_block_matrix_(NULL), temp_scoped_alloc_(), @@ -1526,117 +1529,248 @@ void MIRGraph::SSATransformationEnd() { temp_scoped_alloc_.reset(); } -void MIRGraph::ComputeTopologicalSortOrder() { - // Clear the nodes. - ClearAllVisitedFlags(); +static BasicBlock* SelectTopologicalSortOrderFallBack( + MIRGraph* mir_graph, const ArenaBitVector* current_loop, + const ScopedArenaVector<size_t>* visited_cnt_values, ScopedArenaAllocator* allocator, + ScopedArenaVector<BasicBlockId>* tmp_stack) { + // No true loop head has been found but there may be true loop heads after the mess we need + // to resolve. To avoid taking one of those, pick the candidate with the highest number of + // reachable unvisited nodes. That candidate will surely be a part of a loop. + BasicBlock* fall_back = nullptr; + size_t fall_back_num_reachable = 0u; + // Reuse the same bit vector for each candidate to mark reachable unvisited blocks. + ArenaBitVector candidate_reachable(allocator, mir_graph->GetNumBlocks(), false, kBitMapMisc); + AllNodesIterator iter(mir_graph); + for (BasicBlock* candidate = iter.Next(); candidate != nullptr; candidate = iter.Next()) { + if (candidate->hidden || // Hidden, or + candidate->visited || // already processed, or + (*visited_cnt_values)[candidate->id] == 0u || // no processed predecessors, or + (current_loop != nullptr && // outside current loop. + !current_loop->IsBitSet(candidate->id))) { + continue; + } + DCHECK(tmp_stack->empty()); + tmp_stack->push_back(candidate->id); + candidate_reachable.ClearAllBits(); + size_t num_reachable = 0u; + while (!tmp_stack->empty()) { + BasicBlockId current_id = tmp_stack->back(); + tmp_stack->pop_back(); + BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id); + DCHECK(current_bb != nullptr); + ChildBlockIterator child_iter(current_bb, mir_graph); + BasicBlock* child_bb = child_iter.Next(); + for ( ; child_bb != nullptr; child_bb = child_iter.Next()) { + DCHECK(!child_bb->hidden); + if (child_bb->visited || // Already processed, or + (current_loop != nullptr && // outside current loop. + !current_loop->IsBitSet(child_bb->id))) { + continue; + } + if (!candidate_reachable.IsBitSet(child_bb->id)) { + candidate_reachable.SetBit(child_bb->id); + tmp_stack->push_back(child_bb->id); + num_reachable += 1u; + } + } + } + if (fall_back_num_reachable < num_reachable) { + fall_back_num_reachable = num_reachable; + fall_back = candidate; + } + } + return fall_back; +} - // Create the topological order if need be. - if (topological_order_ == nullptr) { - topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks()); +// Compute from which unvisited blocks is bb_id reachable through unvisited blocks. +static void ComputeUnvisitedReachableFrom(MIRGraph* mir_graph, BasicBlockId bb_id, + ArenaBitVector* reachable, + ScopedArenaVector<BasicBlockId>* tmp_stack) { + // NOTE: Loop heads indicated by the "visited" flag. + DCHECK(tmp_stack->empty()); + reachable->ClearAllBits(); + tmp_stack->push_back(bb_id); + while (!tmp_stack->empty()) { + BasicBlockId current_id = tmp_stack->back(); + tmp_stack->pop_back(); + BasicBlock* current_bb = mir_graph->GetBasicBlock(current_id); + DCHECK(current_bb != nullptr); + GrowableArray<BasicBlockId>::Iterator iter(current_bb->predecessors); + BasicBlock* pred_bb = mir_graph->GetBasicBlock(iter.Next()); + for ( ; pred_bb != nullptr; pred_bb = mir_graph->GetBasicBlock(iter.Next())) { + if (!pred_bb->visited && !reachable->IsBitSet(pred_bb->id)) { + reachable->SetBit(pred_bb->id); + tmp_stack->push_back(pred_bb->id); + } + } } - topological_order_->Reset(); +} +void MIRGraph::ComputeTopologicalSortOrder() { ScopedArenaAllocator allocator(&cu_->arena_stack); + unsigned int num_blocks = GetNumBlocks(); + ScopedArenaQueue<BasicBlock*> q(allocator.Adapter()); - ScopedArenaVector<size_t> visited_cnt_values(GetNumBlocks(), 0u, allocator.Adapter()); + ScopedArenaVector<size_t> visited_cnt_values(num_blocks, 0u, allocator.Adapter()); + ScopedArenaVector<BasicBlockId> loop_head_stack(allocator.Adapter()); + size_t max_nested_loops = 0u; + ArenaBitVector loop_exit_blocks(&allocator, num_blocks, false, kBitMapMisc); + loop_exit_blocks.ClearAllBits(); - // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero. - // also fill initial queue. + // Count the number of blocks to process and add the entry block(s). GrowableArray<BasicBlock*>::Iterator iterator(&block_list_); - - size_t num_blocks = 0u; - while (true) { - BasicBlock* bb = iterator.Next(); - - if (bb == nullptr) { - break; - } - + unsigned int num_blocks_to_process = 0u; + for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) { if (bb->hidden == true) { continue; } - num_blocks += 1u; - size_t unvisited_predecessor_count = bb->predecessors->Size(); - - GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors); - // To process loops we should not wait for dominators. - while (true) { - BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next()); - - if (pred_bb == nullptr) { - break; - } - - // Skip the backward branch or hidden predecessor. - if (pred_bb->hidden || - (pred_bb->dominators != nullptr && pred_bb->dominators->IsBitSet(bb->id))) { - unvisited_predecessor_count -= 1u; - } - } - - visited_cnt_values[bb->id] = unvisited_predecessor_count; + num_blocks_to_process += 1u; - // Add entry block to queue. - if (unvisited_predecessor_count == 0) { + if (bb->predecessors->Size() == 0u) { + // Add entry block to the queue. q.push(bb); } } - // We can get a cycle where none of the blocks dominates the other. Therefore don't - // stop when the queue is empty, continue until we've processed all the blocks. - AllNodesIterator candidate_iter(this); // For the empty queue case. - while (num_blocks != 0u) { - num_blocks -= 1u; + // Create the topological order if need be. + if (topological_order_ == nullptr) { + topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, num_blocks); + topological_order_loop_ends_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks); + topological_order_indexes_ = new (arena_) GrowableArray<uint16_t>(arena_, num_blocks); + } + topological_order_->Reset(); + topological_order_loop_ends_->Reset(); + topological_order_indexes_->Reset(); + topological_order_loop_ends_->Resize(num_blocks); + topological_order_indexes_->Resize(num_blocks); + for (BasicBlockId i = 0; i != num_blocks; ++i) { + topological_order_loop_ends_->Insert(0u); + topological_order_indexes_->Insert(static_cast<uint16_t>(-1)); + } + + // Mark all blocks as unvisited. + ClearAllVisitedFlags(); + + // For loop heads, keep track from which blocks they are reachable not going through other + // loop heads. Other loop heads are excluded to detect the heads of nested loops. The children + // in this set go into the loop body, the other children are jumping over the loop. + ScopedArenaVector<ArenaBitVector*> loop_head_reachable_from(allocator.Adapter()); + loop_head_reachable_from.resize(num_blocks, nullptr); + // Reuse the same temp stack whenever calculating a loop_head_reachable_from[loop_head_id]. + ScopedArenaVector<BasicBlockId> tmp_stack(allocator.Adapter()); + + while (num_blocks_to_process != 0u) { BasicBlock* bb = nullptr; if (!q.empty()) { + num_blocks_to_process -= 1u; // Get top. bb = q.front(); q.pop(); + if (bb->visited) { + // Loop head: it was already processed, mark end and copy exit blocks to the queue. + DCHECK(q.empty()) << PrettyMethod(cu_->method_idx, *cu_->dex_file); + uint16_t idx = static_cast<uint16_t>(topological_order_->Size()); + topological_order_loop_ends_->Put(topological_order_indexes_->Get(bb->id), idx); + DCHECK_EQ(loop_head_stack.back(), bb->id); + loop_head_stack.pop_back(); + ArenaBitVector* reachable = + loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()]; + for (BasicBlockId candidate_id : loop_exit_blocks.Indexes()) { + if (reachable == nullptr || reachable->IsBitSet(candidate_id)) { + q.push(GetBasicBlock(candidate_id)); + // NOTE: The BitVectorSet::IndexIterator will not check the pointed-to bit again, + // so clearing the bit has no effect on the iterator. + loop_exit_blocks.ClearBit(candidate_id); + } + } + continue; + } } else { - // Find some block we didn't visit yet that has at least one visited predecessor. - while (bb == nullptr) { - BasicBlock* candidate = candidate_iter.Next(); - DCHECK(candidate != nullptr); - if (candidate->visited || candidate->hidden) { + // Find the new loop head. + AllNodesIterator iter(this); + while (true) { + BasicBlock* candidate = iter.Next(); + if (candidate == nullptr) { + // We did not find a true loop head, fall back to a reachable block in any loop. + ArenaBitVector* current_loop = + loop_head_stack.empty() ? nullptr : loop_head_reachable_from[loop_head_stack.back()]; + bb = SelectTopologicalSortOrderFallBack(this, current_loop, &visited_cnt_values, + &allocator, &tmp_stack); + DCHECK(bb != nullptr) << PrettyMethod(cu_->method_idx, *cu_->dex_file); + if (kIsDebugBuild && cu_->dex_file != nullptr) { + LOG(INFO) << "Topological sort order: Using fall-back in " + << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " BB #" << bb->id + << " @0x" << std::hex << bb->start_offset + << ", num_blocks = " << std::dec << num_blocks; + } + break; + } + if (candidate->hidden || // Hidden, or + candidate->visited || // already processed, or + visited_cnt_values[candidate->id] == 0u || // no processed predecessors, or + (!loop_head_stack.empty() && // outside current loop. + !loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(candidate->id))) { continue; } - GrowableArray<BasicBlockId>::Iterator iter(candidate->predecessors); - for (BasicBlock* pred_bb = GetBasicBlock(iter.Next()); pred_bb != nullptr; - pred_bb = GetBasicBlock(iter.Next())) { - if (!pred_bb->hidden && pred_bb->visited) { - bb = candidate; - break; + + GrowableArray<BasicBlockId>::Iterator pred_iter(candidate->predecessors); + BasicBlock* pred_bb = GetBasicBlock(pred_iter.Next()); + for ( ; pred_bb != nullptr; pred_bb = GetBasicBlock(pred_iter.Next())) { + if (pred_bb != candidate && !pred_bb->visited && + !pred_bb->dominators->IsBitSet(candidate->id)) { + break; // Keep non-null pred_bb to indicate failure. } } + if (pred_bb == nullptr) { + bb = candidate; + break; + } } + // Compute blocks from which the loop head is reachable and process those blocks first. + ArenaBitVector* reachable = + new (&allocator) ArenaBitVector(&allocator, num_blocks, false, kBitMapMisc); + loop_head_reachable_from[bb->id] = reachable; + ComputeUnvisitedReachableFrom(this, bb->id, reachable, &tmp_stack); + // Now mark as loop head. (Even if it's only a fall back when we don't find a true loop.) + loop_head_stack.push_back(bb->id); + max_nested_loops = std::max(max_nested_loops, loop_head_stack.size()); } DCHECK_EQ(bb->hidden, false); DCHECK_EQ(bb->visited, false); - - // We've visited all the predecessors. So, we can visit bb. bb->visited = true; // Now add the basic block. + uint16_t idx = static_cast<uint16_t>(topological_order_->Size()); + topological_order_indexes_->Put(bb->id, idx); topological_order_->Insert(bb->id); - // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero. + // Update visited_cnt_values for children. ChildBlockIterator succIter(bb, this); BasicBlock* successor = succIter.Next(); for ( ; successor != nullptr; successor = succIter.Next()) { - if (successor->visited || successor->hidden) { + if (successor->hidden) { continue; } - // one more predecessor was visited. - DCHECK_NE(visited_cnt_values[successor->id], 0u); - visited_cnt_values[successor->id] -= 1u; - if (visited_cnt_values[successor->id] == 0u) { - q.push(successor); + // One more predecessor was visited. + visited_cnt_values[successor->id] += 1u; + if (visited_cnt_values[successor->id] == successor->predecessors->Size()) { + if (loop_head_stack.empty() || + loop_head_reachable_from[loop_head_stack.back()]->IsBitSet(successor->id)) { + q.push(successor); + } else { + DCHECK(!loop_exit_blocks.IsBitSet(successor->id)); + loop_exit_blocks.SetBit(successor->id); + } } } } + + // Prepare the loop head stack for iteration. + topological_order_loop_head_stack_ = + new (arena_) GrowableArray<std::pair<uint16_t, bool>>(arena_, max_nested_loops); } bool BasicBlock::IsExceptionBlock() const { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 79b3edf50e..768ae21c28 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -27,6 +27,7 @@ #include "mir_method_info.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" +#include "utils/scoped_arena_containers.h" #include "reg_location.h" #include "reg_storage.h" @@ -689,6 +690,21 @@ class MIRGraph { return topological_order_; } + GrowableArray<BasicBlockId>* GetTopologicalSortOrderLoopEnds() { + DCHECK(topological_order_loop_ends_ != nullptr); + return topological_order_loop_ends_; + } + + GrowableArray<BasicBlockId>* GetTopologicalSortOrderIndexes() { + DCHECK(topological_order_indexes_ != nullptr); + return topological_order_indexes_; + } + + GrowableArray<std::pair<uint16_t, bool>>* GetTopologicalSortOrderLoopHeadStack() { + DCHECK(topological_order_loop_head_stack_ != nullptr); + return topological_order_loop_head_stack_; + } + bool IsConst(int32_t s_reg) const { return is_constant_v_->IsBitSet(s_reg); } @@ -1132,6 +1148,14 @@ class MIRGraph { GrowableArray<BasicBlockId>* dfs_post_order_; GrowableArray<BasicBlockId>* dom_post_order_traversal_; GrowableArray<BasicBlockId>* topological_order_; + // Indexes in topological_order_ need to be only as big as the BasicBlockId. + COMPILE_ASSERT(sizeof(BasicBlockId) == sizeof(uint16_t), assuming_16_bit_BasicBlockId); + // For each loop head, remember the past-the-end index of the end of the loop. 0 if not loop head. + GrowableArray<uint16_t>* topological_order_loop_ends_; + // Map BB ids to topological_order_ indexes. 0xffff if not included (hidden or null block). + GrowableArray<uint16_t>* topological_order_indexes_; + // Stack of the loop head indexes and recalculation flags for RepeatingTopologicalSortIterator. + GrowableArray<std::pair<uint16_t, bool>>* topological_order_loop_head_stack_; int* i_dom_list_; ArenaBitVector** def_block_matrix_; // num_dalvik_register x num_blocks. std::unique_ptr<ScopedArenaAllocator> temp_scoped_alloc_; @@ -1177,6 +1201,7 @@ class MIRGraph { friend class ClassInitCheckEliminationTest; friend class GlobalValueNumberingTest; friend class LocalValueNumberingTest; + friend class TopologicalSortOrderTest; }; } // namespace art diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc new file mode 100644 index 0000000000..932f453b3c --- /dev/null +++ b/compiler/dex/mir_graph_test.cc @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mir_graph.h" +#include "gtest/gtest.h" + +namespace art { + +class TopologicalSortOrderTest : public testing::Test { + protected: + struct BBDef { + static constexpr size_t kMaxSuccessors = 4; + static constexpr size_t kMaxPredecessors = 4; + + BBType type; + size_t num_successors; + BasicBlockId successors[kMaxPredecessors]; + size_t num_predecessors; + BasicBlockId predecessors[kMaxPredecessors]; + }; + +#define DEF_SUCC0() \ + 0u, { } +#define DEF_SUCC1(s1) \ + 1u, { s1 } +#define DEF_SUCC2(s1, s2) \ + 2u, { s1, s2 } +#define DEF_SUCC3(s1, s2, s3) \ + 3u, { s1, s2, s3 } +#define DEF_SUCC4(s1, s2, s3, s4) \ + 4u, { s1, s2, s3, s4 } +#define DEF_PRED0() \ + 0u, { } +#define DEF_PRED1(p1) \ + 1u, { p1 } +#define DEF_PRED2(p1, p2) \ + 2u, { p1, p2 } +#define DEF_PRED3(p1, p2, p3) \ + 3u, { p1, p2, p3 } +#define DEF_PRED4(p1, p2, p3, p4) \ + 4u, { p1, p2, p3, p4 } +#define DEF_BB(type, succ, pred) \ + { type, succ, pred } + + void DoPrepareBasicBlocks(const BBDef* defs, size_t count) { + cu_.mir_graph->block_id_map_.clear(); + cu_.mir_graph->block_list_.Reset(); + ASSERT_LT(3u, count); // null, entry, exit and at least one bytecode block. + ASSERT_EQ(kNullBlock, defs[0].type); + ASSERT_EQ(kEntryBlock, defs[1].type); + ASSERT_EQ(kExitBlock, defs[2].type); + for (size_t i = 0u; i != count; ++i) { + const BBDef* def = &defs[i]; + BasicBlock* bb = cu_.mir_graph->NewMemBB(def->type, i); + cu_.mir_graph->block_list_.Insert(bb); + if (def->num_successors <= 2) { + bb->successor_block_list_type = kNotUsed; + bb->successor_blocks = nullptr; + bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u; + bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u; + } else { + bb->successor_block_list_type = kPackedSwitch; + bb->fall_through = 0u; + bb->taken = 0u; + bb->successor_blocks = new (&cu_.arena) GrowableArray<SuccessorBlockInfo*>( + &cu_.arena, def->num_successors, kGrowableArraySuccessorBlocks); + for (size_t j = 0u; j != def->num_successors; ++j) { + SuccessorBlockInfo* successor_block_info = + static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo), + kArenaAllocSuccessor)); + successor_block_info->block = j; + successor_block_info->key = 0u; // Not used by class init check elimination. + bb->successor_blocks->Insert(successor_block_info); + } + } + bb->predecessors = new (&cu_.arena) GrowableArray<BasicBlockId>( + &cu_.arena, def->num_predecessors, kGrowableArrayPredecessors); + for (size_t j = 0u; j != def->num_predecessors; ++j) { + ASSERT_NE(0u, def->predecessors[j]); + bb->predecessors->Insert(def->predecessors[j]); + } + if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) { + bb->data_flow_info = static_cast<BasicBlockDataFlow*>( + cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo)); + } + } + cu_.mir_graph->num_blocks_ = count; + ASSERT_EQ(count, cu_.mir_graph->block_list_.Size()); + cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_.Get(1); + ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); + cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_.Get(2); + ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type); + } + + template <size_t count> + void PrepareBasicBlocks(const BBDef (&defs)[count]) { + DoPrepareBasicBlocks(defs, count); + } + + void ComputeTopologicalSortOrder() { + cu_.mir_graph->SSATransformationStart(); + cu_.mir_graph->ComputeDFSOrders(); + cu_.mir_graph->ComputeDominators(); + cu_.mir_graph->ComputeTopologicalSortOrder(); + cu_.mir_graph->SSATransformationEnd(); + ASSERT_NE(cu_.mir_graph->topological_order_, nullptr); + ASSERT_NE(cu_.mir_graph->topological_order_loop_ends_, nullptr); + ASSERT_NE(cu_.mir_graph->topological_order_indexes_, nullptr); + ASSERT_EQ(cu_.mir_graph->GetNumBlocks(), cu_.mir_graph->topological_order_indexes_->Size()); + for (size_t i = 0, size = cu_.mir_graph->GetTopologicalSortOrder()->Size(); i != size; ++i) { + ASSERT_LT(cu_.mir_graph->topological_order_->Get(i), cu_.mir_graph->GetNumBlocks()); + BasicBlockId id = cu_.mir_graph->topological_order_->Get(i); + EXPECT_EQ(i, cu_.mir_graph->topological_order_indexes_->Get(id)); + } + } + + void DoCheckOrder(const BasicBlockId* ids, size_t count) { + ASSERT_EQ(count, cu_.mir_graph->GetTopologicalSortOrder()->Size()); + for (size_t i = 0; i != count; ++i) { + EXPECT_EQ(ids[i], cu_.mir_graph->GetTopologicalSortOrder()->Get(i)) << i; + } + } + + template <size_t count> + void CheckOrder(const BasicBlockId (&ids)[count]) { + DoCheckOrder(ids, count); + } + + void DoCheckLoopEnds(const uint16_t* ends, size_t count) { + for (size_t i = 0; i != count; ++i) { + ASSERT_LT(i, cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Size()); + EXPECT_EQ(ends[i], cu_.mir_graph->GetTopologicalSortOrderLoopEnds()->Get(i)) << i; + } + } + + template <size_t count> + void CheckLoopEnds(const uint16_t (&ends)[count]) { + DoCheckLoopEnds(ends, count); + } + + TopologicalSortOrderTest() + : pool_(), + cu_(&pool_) { + cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); + } + + ArenaPool pool_; + CompilationUnit cu_; +}; + +TEST_F(TopologicalSortOrderTest, DoWhile) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)), // "taken" loops to self. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 2 + }; + const uint16_t loop_ends[] = { + 0, 0, 3, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, While) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED2(1, 4)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(3)), // Loops to 3. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 2 + }; + const uint16_t loop_ends[] = { + 0, 3, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, WhileWithTwoBackEdges) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED3(1, 4, 5)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED1(3)), // Loops to 3. + DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)), // Loops to 3. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 2 + }; + const uint16_t loop_ends[] = { + 0, 4, 0, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, NestedLoop) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED2(1, 6)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED2(3, 5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)), // Loops to 4. + DEF_BB(kDalvikByteCode, DEF_SUCC1(3), DEF_PRED1(4)), // Loops to 3. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 7, 2 + }; + const uint16_t loop_ends[] = { + 0, 5, 4, 0, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, NestedLoopHeadLoops) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 4)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 3), DEF_PRED2(3, 5)), // Nested head, loops to 3. + DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)), // Loops to 4. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 2 + }; + const uint16_t loop_ends[] = { + 0, 4, 4, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, NestedLoopSameBackBranchBlock) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 6), DEF_PRED2(1, 5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 5)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 3), DEF_PRED1(4)), // Loops to 4 and 3. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 2 + }; + const uint16_t loop_ends[] = { + 0, 4, 4, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, TwoReorderedInnerLoops) { + // This is a simplified version of real code graph where the branch from 8 to 5 must prevent + // the block 5 from being considered a loop head before processing the loop 7-8. + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(9)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 9), DEF_PRED2(1, 5)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 7), DEF_PRED1(3)), // Branch over loop in 5. + DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 3), DEF_PRED3(4, 6, 8)), // Loops to 4; inner loop. + DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)), // Loops to 5. + DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED2(4, 8)), // Loop head. + DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 5), DEF_PRED1(7)), // Loops to 7; branches to 5. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(3)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 7, 8, 5, 6, 9, 2 + }; + const uint16_t loop_ends[] = { + 0, 7, 0, 5, 0, 7, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, NestedLoopWithBackEdgeAfterOuterLoopBackEdge) { + // This is a simplified version of real code graph. The back-edge from 7 to the inner + // loop head 4 comes after the back-edge from 6 to the outer loop head 3. To make this + // appear a bit more complex, there's also a back-edge from 5 to 4. + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED2(1, 6)), // Outer loop head. + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 6), DEF_PRED3(3, 5, 7)), // Inner loop head. + DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(4)), // Loops to inner loop head 4. + DEF_BB(kDalvikByteCode, DEF_SUCC2(7, 3), DEF_PRED1(4)), // Loops to outer loop head 3. + DEF_BB(kDalvikByteCode, DEF_SUCC2(2, 4), DEF_PRED1(6)), // Loops to inner loop head 4. + }; + const BasicBlockId expected_order[] = { + // NOTE: The 5 goes before 6 only because 5 is a "fall-through" from 4 while 6 is "taken". + 1, 3, 4, 5, 6, 7, 2 + }; + const uint16_t loop_ends[] = { + 0, 6, 6, 0, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +TEST_F(TopologicalSortOrderTest, LoopWithTwoEntryPoints) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(7)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(3, 6)), // Fall-back block is chosen as + DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)), // the earlier from these two. + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 7), DEF_PRED1(5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(6)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 7, 2 + }; + const uint16_t loop_ends[] = { + 0, 0, 5, 0, 0, 0, 0 + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); +} + +} // namespace art diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index f69eea7fb0..5c98654c32 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -321,7 +321,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { return true; } // Don't do a separate LVN if we did the GVN. - bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0; + bool use_lvn = bb->use_lvn && (cu_->disable_opt & (1u << kGlobalValueNumbering)) != 0u; std::unique_ptr<ScopedArenaAllocator> allocator; std::unique_ptr<GlobalValueNumbering> global_valnum; std::unique_ptr<LocalValueNumbering> local_valnum; @@ -1139,11 +1139,7 @@ void MIRGraph::EliminateClassInitChecksEnd() { } bool MIRGraph::ApplyGlobalValueNumberingGate() { - if ((cu_->disable_opt & (1 << kGlobalValueNumbering)) != 0) { - return false; - } - - if ((merged_df_flags_ & DF_LVN) == 0) { + if ((cu_->disable_opt & (1u << kGlobalValueNumbering)) != 0u) { return false; } @@ -1179,7 +1175,7 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() { lvn->GetValueNumber(mir); } bool change = temp_gvn_->FinishBasicBlock(bb); - DCHECK(!change); + DCHECK(!change) << PrettyMethod(cu_->method_idx, *cu_->dex_file); } } } else { diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 4a0cf5c78e..c510b528ff 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -195,7 +195,7 @@ class ClassInitCheckEliminationTest : public testing::Test { cu_.mir_graph->SSATransformationEnd(); bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate(); ASSERT_TRUE(gate_result); - RepeatingTopologicalSortIterator iterator(cu_.mir_graph.get()); + LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get()); bool change = false; for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) { change = cu_.mir_graph->EliminateClassInitChecks(bb); diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h index 031c5cf87d..133593ceb9 100644 --- a/compiler/dex/pass_driver_me.h +++ b/compiler/dex/pass_driver_me.h @@ -68,6 +68,9 @@ class PassDriverME: public PassDriver<PassDriverType> { case kRepeatingTopologicalSortTraversal: DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass); break; + case kLoopRepeatingTopologicalSortTraversal: + DoWalkBasicBlocks<LoopRepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass); + break; case kAllNodes: DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass); break; diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h index ff698654cf..c7276eb905 100644 --- a/compiler/dex/pass_me.h +++ b/compiler/dex/pass_me.h @@ -55,6 +55,7 @@ enum DataFlowAnalysisMode { kPostOrderDOMTraversal, /**< @brief Dominator tree / Post-Order. */ kTopologicalSortTraversal, /**< @brief Topological Order traversal. */ kRepeatingTopologicalSortTraversal, /**< @brief Repeating Topological Order traversal. */ + kLoopRepeatingTopologicalSortTraversal, /**< @brief Loop-repeating Topological Order traversal. */ kNoNodes, /**< @brief Skip BasicBlock traversal. */ }; diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index 892b30284f..4a3e071d1e 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -324,13 +324,15 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) { } for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) { - if (ssa_rep->fp_use[i]) + if (ssa_rep->fp_use[i]) { changed |= SetFp(uses[i]); } + } for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) { - if (ssa_rep->fp_def[i]) + if (ssa_rep->fp_def[i]) { changed |= SetFp(defs[i]); } + } // Special-case handling for moves & Phi if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) { /* |
