diff options
-rw-r--r-- | lib/Transforms/Scalar/LICM.cpp | 37 | ||||
-rw-r--r-- | test/Transforms/LICM/promote-order.ll | 41 |
2 files changed, 67 insertions, 11 deletions
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 582948ea14..0192e928fe 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -175,7 +175,9 @@ namespace { bool canSinkOrHoistInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - void PromoteAliasSet(AliasSet &AS); + void PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts); }; } @@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + SmallVector<BasicBlock *, 8> ExitBlocks; + SmallVector<Instruction *, 8> InsertPts; + // Loop over all of the alias sets in the tracker object. for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; ++I) - PromoteAliasSet(*I); + PromoteAliasSet(*I, ExitBlocks, InsertPts); } // Clear out loops state information for the next iteration @@ -631,6 +636,7 @@ namespace { Value *SomePtr; // Designated pointer to store to. SmallPtrSet<Value*, 4> &PointerMustAliases; SmallVectorImpl<BasicBlock*> &LoopExitBlocks; + SmallVectorImpl<Instruction*> &LoopInsertPts; AliasSetTracker &AST; DebugLoc DL; int Alignment; @@ -638,11 +644,12 @@ namespace { LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, - SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl, int alignment) + SmallVectorImpl<BasicBlock*> &LEB, + SmallVectorImpl<Instruction*> &LIP, + AliasSetTracker &ast, DebugLoc dl, int alignment) : LoadAndStorePromoter(Insts, S), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), - Alignment(alignment) {} + PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), + AST(ast), DL(dl), Alignment(alignment) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -662,7 +669,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); + Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -684,7 +691,9 @@ namespace { /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// -void LICM::PromoteAliasSet(AliasSet &AS) { +void LICM::PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. @@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.resize(ExitBlocks.size()); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt(); + } // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL, Alignment); + InsertPts, *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll new file mode 100644 index 0000000000..b016265bbb --- /dev/null +++ b/test/Transforms/LICM/promote-order.ll @@ -0,0 +1,41 @@ +; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s + +; LICM should keep the stores in their original order when it sinks/promotes them. +; rdar://12045203 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@p = external global i8* + +define i32* @_Z4doiti(i32 %n, float* %tmp1, i32* %tmp3) nounwind { +entry: + %cmp1 = icmp slt i32 0, %n + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + store float 1.000000e+00, float* %tmp1, align 4, !tbaa !1 + store i32 1, i32* %tmp3, align 4, !tbaa !2 + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +; CHECK: for.cond.for.end_crit_edge: +; CHECK: store float 1.000000e+00, float* %tmp1 +; CHECK: store i32 1, i32* %tmp3 +for.cond.for.end_crit_edge: ; preds = %for.body + %split = phi i32* [ %tmp3, %for.body ] + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %r.0.lcssa = phi i32* [ %split, %for.cond.for.end_crit_edge ], [ undef, %entry ] + ret i32* %r.0.lcssa +} + +!0 = metadata !{metadata !"minimal TBAA"} +!1 = metadata !{metadata !"float", metadata !0} +!2 = metadata !{metadata !"int", metadata !0} |