SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.

Make sure that we don't replace and RAUW two sequences if one does not dominate the other. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184674 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2013-06-23 21:57:27 +0000
committer: Nadav Rotem <nrotem@apple.com> 2013-06-23 21:57:27 +0000
commit: 722b0a4d293b16eebaed94ae65d5f11743cbcea5 (patch)
tree: a8c2209653dfad4d2f5d9a5e8c0ad92e1878f454
parent: 787ad64b989937e0d79e176b0bf9af4a85a839d0 (diff)
download: external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.tar.gz
external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.tar.bz2
external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.zip
2 files changed, 61 insertions, 4 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 838cb9599c..5bc3d852e7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -127,8 +127,9 @@ public:
   static const int MAX_COST = INT_MIN;
 
   FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
-          TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li) :
-    F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li),
+          TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, 
+          DominatorTree *Dt) :
+    F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
     Builder(Se->getContext()) {
     for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
       BasicBlock *BB = it;
@@ -255,6 +256,7 @@ public:
   TargetTransformInfo *TTI;
   AliasAnalysis *AA;
   LoopInfo *LI;
+  DominatorTree *DT;
   /// Instruction builder to construct the vectorized tree.
   IRBuilder<> Builder;
 };
@@ -1197,7 +1199,8 @@ void FuncSLP::optimizeGatherSequence() {
      // visited instructions.
       for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
            ve = Visited.end(); v != ve; ++v) {
-        if (Insert->isIdenticalTo(*v)) {
+        if (Insert->isIdenticalTo(*v) &&
+          DT->dominates((*v)->getParent(), Insert->getParent())) {
           Insert->replaceAllUsesWith(*v);
           break;
         }
@@ -1224,6 +1227,7 @@ struct SLPVectorizer : public FunctionPass {
   TargetTransformInfo *TTI;
   AliasAnalysis *AA;
   LoopInfo *LI;
+  DominatorTree *DT;
 
   virtual bool runOnFunction(Function &F) {
     SE = &getAnalysis<ScalarEvolution>();
@@ -1231,6 +1235,7 @@ struct SLPVectorizer : public FunctionPass {
     TTI = &getAnalysis<TargetTransformInfo>();
     AA = &getAnalysis<AliasAnalysis>();
     LI = &getAnalysis<LoopInfo>();
+    DT = &getAnalysis<DominatorTree>();
 
     StoreRefs.clear();
     bool Changed = false;
@@ -1244,7 +1249,7 @@ struct SLPVectorizer : public FunctionPass {
 
     // Use the bollom up slp vectorizer to construct chains that start with
     // he store instructions.
-    FuncSLP R(&F, SE, DL, TTI, AA, LI);
+    FuncSLP R(&F, SE, DL, TTI, AA, LI, DT);
 
     for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
       BasicBlock *BB = it;
@@ -1274,6 +1279,7 @@ struct SLPVectorizer : public FunctionPass {
     AU.addRequired<AliasAnalysis>();
     AU.addRequired<TargetTransformInfo>();
     AU.addRequired<LoopInfo>();
+    AU.addRequired<DominatorTree>();
   }
 
 private:
diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll
index 6321b00768..d286798d70 100644
--- a/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -83,3 +83,54 @@ entry:
   ret i32 undef
 }
 
+; int test2(double *G, int k) {
+;   if (k) {
+;     G[0] = 1+G[5]*4;
+;     G[1] = 6+G[6]*3;
+;   } else {
+;     G[2] = 7+G[5]*4;
+;     G[3] = 8+G[6]*3;
+;   }
+; }
+
+; We can't merge the gather sequences because one does not dominate the other.
+; CHECK: test2
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: ret
+define i32 @test2(double* nocapture %G, i32 %k) {
+  %1 = icmp eq i32 %k, 0
+  %2 = getelementptr inbounds double* %G, i64 5
+  %3 = load double* %2, align 8
+  %4 = fmul double %3, 4.000000e+00
+  br i1 %1, label %12, label %5
+
+; <label>:5                                       ; preds = %0
+  %6 = fadd double %4, 1.000000e+00
+  store double %6, double* %G, align 8
+  %7 = getelementptr inbounds double* %G, i64 6
+  %8 = load double* %7, align 8
+  %9 = fmul double %8, 3.000000e+00
+  %10 = fadd double %9, 6.000000e+00
+  %11 = getelementptr inbounds double* %G, i64 1
+  store double %10, double* %11, align 8
+  br label %20
+
+; <label>:12                                      ; preds = %0
+  %13 = fadd double %4, 7.000000e+00
+  %14 = getelementptr inbounds double* %G, i64 2
+  store double %13, double* %14, align 8
+  %15 = getelementptr inbounds double* %G, i64 6
+  %16 = load double* %15, align 8
+  %17 = fmul double %16, 3.000000e+00
+  %18 = fadd double %17, 8.000000e+00
+  %19 = getelementptr inbounds double* %G, i64 3
+  store double %18, double* %19, align 8
+  br label %20
+
+; <label>:20                                      ; preds = %12, %5
+  ret i32 undef
+}
+
author	Nadav Rotem <nrotem@apple.com>	2013-06-23 21:57:27 +0000
committer	Nadav Rotem <nrotem@apple.com>	2013-06-23 21:57:27 +0000
commit	722b0a4d293b16eebaed94ae65d5f11743cbcea5 (patch)
tree	a8c2209653dfad4d2f5d9a5e8c0ad92e1878f454
parent	787ad64b989937e0d79e176b0bf9af4a85a839d0 (diff)
download	external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.tar.gz external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.tar.bz2 external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.zip