diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-06-23 21:57:27 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-06-23 21:57:27 +0000 |
commit | 722b0a4d293b16eebaed94ae65d5f11743cbcea5 (patch) | |
tree | a8c2209653dfad4d2f5d9a5e8c0ad92e1878f454 | |
parent | 787ad64b989937e0d79e176b0bf9af4a85a839d0 (diff) | |
download | external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.tar.gz external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.tar.bz2 external_llvm-722b0a4d293b16eebaed94ae65d5f11743cbcea5.zip |
SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.
Make sure that we don't replace and RAUW two sequences if one does not dominate the other.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184674 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/cse.ll | 51 |
2 files changed, 61 insertions, 4 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 838cb9599c..5bc3d852e7 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -127,8 +127,9 @@ public: static const int MAX_COST = INT_MIN; FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li) : - F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), + TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, + DominatorTree *Dt) : + F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt), Builder(Se->getContext()) { for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { BasicBlock *BB = it; @@ -255,6 +256,7 @@ public: TargetTransformInfo *TTI; AliasAnalysis *AA; LoopInfo *LI; + DominatorTree *DT; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; }; @@ -1197,7 +1199,8 @@ void FuncSLP::optimizeGatherSequence() { // visited instructions. for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(), ve = Visited.end(); v != ve; ++v) { - if (Insert->isIdenticalTo(*v)) { + if (Insert->isIdenticalTo(*v) && + DT->dominates((*v)->getParent(), Insert->getParent())) { Insert->replaceAllUsesWith(*v); break; } @@ -1224,6 +1227,7 @@ struct SLPVectorizer : public FunctionPass { TargetTransformInfo *TTI; AliasAnalysis *AA; LoopInfo *LI; + DominatorTree *DT; virtual bool runOnFunction(Function &F) { SE = &getAnalysis<ScalarEvolution>(); @@ -1231,6 +1235,7 @@ struct SLPVectorizer : public FunctionPass { TTI = &getAnalysis<TargetTransformInfo>(); AA = &getAnalysis<AliasAnalysis>(); LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); StoreRefs.clear(); bool Changed = false; @@ -1244,7 +1249,7 @@ struct SLPVectorizer : public FunctionPass { // Use the bollom up slp vectorizer to construct chains that start with // he store instructions. - FuncSLP R(&F, SE, DL, TTI, AA, LI); + FuncSLP R(&F, SE, DL, TTI, AA, LI, DT); for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) { BasicBlock *BB = it; @@ -1274,6 +1279,7 @@ struct SLPVectorizer : public FunctionPass { AU.addRequired<AliasAnalysis>(); AU.addRequired<TargetTransformInfo>(); AU.addRequired<LoopInfo>(); + AU.addRequired<DominatorTree>(); } private: diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll index 6321b00768..d286798d70 100644 --- a/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/test/Transforms/SLPVectorizer/X86/cse.ll @@ -83,3 +83,54 @@ entry: ret i32 undef } +; int test2(double *G, int k) { +; if (k) { +; G[0] = 1+G[5]*4; +; G[1] = 6+G[6]*3; +; } else { +; G[2] = 7+G[5]*4; +; G[3] = 8+G[6]*3; +; } +; } + +; We can't merge the gather sequences because one does not dominate the other. +; CHECK: test2 +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: ret +define i32 @test2(double* nocapture %G, i32 %k) { + %1 = icmp eq i32 %k, 0 + %2 = getelementptr inbounds double* %G, i64 5 + %3 = load double* %2, align 8 + %4 = fmul double %3, 4.000000e+00 + br i1 %1, label %12, label %5 + +; <label>:5 ; preds = %0 + %6 = fadd double %4, 1.000000e+00 + store double %6, double* %G, align 8 + %7 = getelementptr inbounds double* %G, i64 6 + %8 = load double* %7, align 8 + %9 = fmul double %8, 3.000000e+00 + %10 = fadd double %9, 6.000000e+00 + %11 = getelementptr inbounds double* %G, i64 1 + store double %10, double* %11, align 8 + br label %20 + +; <label>:12 ; preds = %0 + %13 = fadd double %4, 7.000000e+00 + %14 = getelementptr inbounds double* %G, i64 2 + store double %13, double* %14, align 8 + %15 = getelementptr inbounds double* %G, i64 6 + %16 = load double* %15, align 8 + %17 = fmul double %16, 3.000000e+00 + %18 = fadd double %17, 8.000000e+00 + %19 = getelementptr inbounds double* %G, i64 3 + store double %18, double* %19, align 8 + br label %20 + +; <label>:20 ; preds = %12, %5 + ret i32 undef +} + |