Re-implement the main strength-reduction portion of LoopStrengthReduction.

This new version is much more aggressive about doing "full" reduction in cases where it reduces register pressure, and also more aggressive about rewriting induction variables to count down (or up) to zero when doing so reduces register pressure. It currently uses fairly simplistic algorithms for finding reuse opportunities, but it introduces a new framework allows it to combine multiple strategies at once to form hybrid solutions, instead of doing all full-reduction or all base+index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94061 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2010-01-21 02:09:26 +0000
committer: Dan Gohman <gohman@apple.com> 2010-01-21 02:09:26 +0000
commit: d7140f1336529590de9a323ac6feff8391bf6dc8 (patch)
tree: 52a9cf0b867521026963ca127f6a46d19f0f8f78 /lib/Analysis
parent: a6120d5023892e1397a44c36669676ceb2a24907 (diff)
download: external_llvm-d7140f1336529590de9a323ac6feff8391bf6dc8.tar.gz
external_llvm-d7140f1336529590de9a323ac6feff8391bf6dc8.tar.bz2
external_llvm-d7140f1336529590de9a323ac6feff8391bf6dc8.zip
3 files changed, 359 insertions, 81 deletions
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 92f00273a2..38611ccb62 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -324,12 +324,6 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // the actual replacement value.
   if (U.isUseOfPostIncrementedValue())
     RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
-  // Evaluate the expression out of the loop, if possible.
-  if (!L->contains(U.getUser())) {
-    const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
-    if (ExitVal->isLoopInvariant(L))
-      RetVal = ExitVal;
-  }
   return RetVal;
 }
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 7389007bf2..2f44913abd 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1089,6 +1089,15 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
   if (!isa<SCEVSignExtendExpr>(SExt))
     return SExt;
 
+  // Force the cast to be folded into the operands of an addrec.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Ops;
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      Ops.push_back(getAnyExtendExpr(*I, Ty));
+    return getAddRecExpr(Ops, AR->getLoop());
+  }
+
   // If the expression is obviously signed, use the sext cast value.
   if (isa<SCEVSMaxExpr>(Op))
     return SExt;
@@ -1204,6 +1213,17 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVAddExpr operand types don't match!");
 #endif
 
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (!isKnownNonNegative(Ops[i])) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, LI);
 
@@ -1521,10 +1541,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>();
-  new (S) SCEVAddExpr(ID, Ops);
-  UniqueSCEVs.InsertNode(S, IP);
+  SCEVAddExpr *S =
+    static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    S = SCEVAllocator.Allocate<SCEVAddExpr>();
+    new (S) SCEVAddExpr(ID, Ops);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
   if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
@@ -1535,6 +1558,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         bool HasNUW, bool HasNSW) {
   assert(!Ops.empty() && "Cannot get empty mul!");
+  if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) ==
@@ -1542,6 +1566,17 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVMulExpr operand types don't match!");
 #endif
 
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (!isKnownNonNegative(Ops[i])) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, LI);
 
@@ -1576,6 +1611,22 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
       // If we have a multiply of zero, it will always be zero.
       return Ops[0];
+    } else if (Ops[0]->isAllOnesValue()) {
+      // If we have a mul by -1 of an add, try distributing the -1 among the
+      // add operands.
+      if (Ops.size() == 2)
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+          SmallVector<const SCEV *, 4> NewOps;
+          bool AnyFolded = false;
+          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+               I != E; ++I) {
+            const SCEV *Mul = getMulExpr(Ops[0], *I);
+            if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+            NewOps.push_back(Mul);
+          }
+          if (AnyFolded)
+            return getAddExpr(NewOps);
+        }
     }
   }
 
@@ -1642,7 +1693,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 
       // It's tempting to propagate the NSW flag here, but nsw multiplication
       // is not associative so this isn't necessarily safe.
-      const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(),
+                                         HasNUW && AddRec->hasNoUnsignedWrap(),
+                                         /*HasNSW=*/false);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1696,10 +1749,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>();
-  new (S) SCEVMulExpr(ID, Ops);
-  UniqueSCEVs.InsertNode(S, IP);
+  SCEVMulExpr *S =
+    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    S = SCEVAllocator.Allocate<SCEVMulExpr>();
+    new (S) SCEVMulExpr(ID, Ops);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
   if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
@@ -1842,10 +1898,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
     return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
   }
 
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      if (!isKnownNonNegative(Operands[i])) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop *NestedLoop = NestedAR->getLoop();
-    if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
+    if (L->contains(NestedLoop->getHeader()) ?
+        (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+        (!NestedLoop->contains(L->getHeader()) &&
+         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                   NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
@@ -1884,10 +1954,13 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
     ID.AddPointer(Operands[i]);
   ID.AddPointer(L);
   void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
-  new (S) SCEVAddRecExpr(ID, Operands, L);
-  UniqueSCEVs.InsertNode(S, IP);
+  SCEVAddRecExpr *S =
+    static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
+    new (S) SCEVAddRecExpr(ID, Operands, L);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
   if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
@@ -2525,31 +2598,28 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
             if (Accum->isLoopInvariant(L) ||
                 (isa<SCEVAddRecExpr>(Accum) &&
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+              bool HasNUW = false;
+              bool HasNSW = false;
+
+              // If the increment doesn't overflow, then neither the addrec nor
+              // the post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+                if (OBO->hasNoUnsignedWrap())
+                  HasNUW = true;
+                if (OBO->hasNoSignedWrap())
+                  HasNSW = true;
+              }
+
               const SCEV *StartVal =
                 getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEVAddRecExpr *PHISCEV =
-                cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L));
-
-              // If the increment doesn't overflow, then neither the addrec nor the
-              // post-increment will overflow.
-              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV))
-                if (OBO->getOperand(0) == PN &&
-                    getSCEV(OBO->getOperand(1)) ==
-                      PHISCEV->getStepRecurrence(*this)) {
-                  const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this);
-                  if (OBO->hasNoUnsignedWrap()) {
-                    const_cast<SCEVAddRecExpr *>(PHISCEV)
-                      ->setHasNoUnsignedWrap(true);
-                    const_cast<SCEVAddRecExpr *>(PostInc)
-                      ->setHasNoUnsignedWrap(true);
-                  }
-                  if (OBO->hasNoSignedWrap()) {
-                    const_cast<SCEVAddRecExpr *>(PHISCEV)
-                      ->setHasNoSignedWrap(true);
-                    const_cast<SCEVAddRecExpr *>(PostInc)
-                      ->setHasNoSignedWrap(true);
-                  }
-                }
+              const SCEV *PHISCEV =
+                getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
+
+              // Since the no-wrap flags are on the increment, they apply to the
+              // post-incremented value as well.
+              if (Accum->isLoopInvariant(L))
+                (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+                                    Accum, L, HasNUW, HasNSW);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and purge all of the
@@ -2781,26 +2851,29 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
     const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
-    if (!Trip) return FullSet;
+    ConstantRange ConservativeResult = FullSet;
+
+    // If there's no unsigned wrap, the value will never be less than its
+    // initial value.
+    if (AddRec->hasNoUnsignedWrap())
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+        ConservativeResult =
+          ConstantRange(C->getValue()->getValue(),
+                        APInt(getTypeSizeInBits(C->getType()), 0));
 
     // TODO: non-affine addrec
-    if (AddRec->isAffine()) {
+    if (Trip && AddRec->isAffine()) {
       const Type *Ty = AddRec->getType();
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
       if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *Step = AddRec->getStepRecurrence(*this);
         const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
 
         // Check for overflow.
-        // TODO: This is very conservative.
-        if (!(Step->isOne() &&
-              isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) &&
-            !(Step->isAllOnesValue() &&
-              isKnownPredicate(ICmpInst::ICMP_UGT, Start, End)))
-          return FullSet;
+        if (!AddRec->hasNoUnsignedWrap())
+          return ConservativeResult;
 
         ConstantRange StartRange = getUnsignedRange(Start);
         ConstantRange EndRange = getUnsignedRange(End);
@@ -2809,10 +2882,12 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
                                    EndRange.getUnsignedMax());
         if (Min.isMinValue() && Max.isMaxValue())
-          return FullSet;
+          return ConservativeResult;
         return ConstantRange(Min, Max+1);
       }
     }
+
+    return ConservativeResult;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -2891,26 +2966,39 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
     const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
-    if (!Trip) return FullSet;
+    ConstantRange ConservativeResult = FullSet;
+
+    // If there's no signed wrap, and all the operands have the same sign or
+    // zero, the value won't ever change sign.
+    if (AddRec->hasNoSignedWrap()) {
+      bool AllNonNeg = true;
+      bool AllNonPos = true;
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+        if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+        if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+      }
+      unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+      if (AllNonNeg)
+        ConservativeResult = ConstantRange(APInt(BitWidth, 0),
+                                           APInt::getSignedMinValue(BitWidth));
+      else if (AllNonPos)
+        ConservativeResult = ConstantRange(APInt::getSignedMinValue(BitWidth),
+                                           APInt(BitWidth, 1));
+    }
 
     // TODO: non-affine addrec
-    if (AddRec->isAffine()) {
+    if (Trip && AddRec->isAffine()) {
       const Type *Ty = AddRec->getType();
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
       if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *Step = AddRec->getStepRecurrence(*this);
         const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
 
         // Check for overflow.
-        // TODO: This is very conservative.
-        if (!(Step->isOne() &&
-              isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) &&
-            !(Step->isAllOnesValue() &&
-              isKnownPredicate(ICmpInst::ICMP_SGT, Start, End)))
-          return FullSet;
+        if (!AddRec->hasNoSignedWrap())
+          return ConservativeResult;
 
         ConstantRange StartRange = getSignedRange(Start);
         ConstantRange EndRange = getSignedRange(End);
@@ -2919,15 +3007,19 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
                                    EndRange.getSignedMax());
         if (Min.isMinSignedValue() && Max.isMaxSignedValue())
-          return FullSet;
+          return ConservativeResult;
         return ConstantRange(Min, Max+1);
       }
     }
+
+    return ConservativeResult;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     unsigned BitWidth = getTypeSizeInBits(U->getType());
+    if (!U->getValue()->getType()->isInteger() && !TD)
+      return FullSet;
     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
     if (NS == 1)
       return FullSet;
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2c66f1ac2c..b049f424fa 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -81,7 +81,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
 
     Instruction *I = CastInst::Create(Op, V, Ty, V->getName(),
                                       A->getParent()->getEntryBlock().begin());
-    InsertedValues.insert(I);
+    rememberInstruction(I);
     return I;
   }
 
@@ -114,7 +114,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
     IP = II->getNormalDest()->begin();
   while (isa<PHINode>(IP)) ++IP;
   Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP);
-  InsertedValues.insert(CI);
+  rememberInstruction(CI);
   return CI;
 }
 
@@ -144,7 +144,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 
   // If we haven't found this binop, insert it.
   Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
-  InsertedValues.insert(BO);
+  rememberInstruction(BO);
   return BO;
 }
 
@@ -491,22 +491,39 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
 
     // Emit a GEP.
     Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
-    InsertedValues.insert(GEP);
+    rememberInstruction(GEP);
     return GEP;
   }
 
   // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
   // because ScalarEvolution may have changed the address arithmetic to
   // compute a value which is beyond the end of the allocated object.
-  Value *GEP = Builder.CreateGEP(V,
+  Value *Casted = V;
+  if (V->getType() != PTy)
+    Casted = InsertNoopCastOfTo(Casted, PTy);
+  Value *GEP = Builder.CreateGEP(Casted,
                                  GepIndices.begin(),
                                  GepIndices.end(),
                                  "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
-  InsertedValues.insert(GEP);
+  rememberInstruction(GEP);
   return expand(SE.getAddExpr(Ops));
 }
 
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEV *F) {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   int NumOperands = S->getNumOperands();
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
@@ -539,8 +556,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   // Emit a bunch of add instructions
   for (int i = NumOperands-1; i >= 0; --i) {
     if (i == PIdx) continue;
-    Value *W = expandCodeFor(S->getOperand(i), Ty);
-    V = InsertBinop(Instruction::Add, V, W);
+    const SCEV *Op = S->getOperand(i);
+    if (isNonConstantNegative(Op)) {
+      Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+      V = InsertBinop(Instruction::Sub, V, W);
+    } else {
+      Value *W = expandCodeFor(Op, Ty);
+      V = InsertBinop(Instruction::Add, V, W);
+    }
   }
   return V;
 }
@@ -603,7 +626,175 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
   }
 }
 
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                        const Loop *L,
+                                        const Type *ExpandTy,
+                                        const Type *IntTy) {
+  // Reuse a previously-inserted PHI, if present.
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
+      return PN;
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Expand code for the start value.
+  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+                                L->getHeader()->begin());
+
+  // Expand code for the step value. Insert instructions right before the
+  // terminator corresponding to the back-edge. Do this before creating the PHI
+  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
+  // negative, insert a sub instead of an add for the increment (unless it's a
+  // constant, because subtracts of constants are canonicalized to adds).
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  bool isPointer = isa<PointerType>(ExpandTy);
+  bool isNegative = !isPointer && isNonConstantNegative(Step);
+  if (isNegative)
+    Step = SE.getNegativeSCEV(Step);
+  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+  // Create the PHI.
+  Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin());
+  PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv");
+  rememberInstruction(PN);
+
+  // Create the step instructions and populate the PHI.
+  BasicBlock *Header = L->getHeader();
+  for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
+       HPI != HPE; ++HPI) {
+    BasicBlock *Pred = *HPI;
+
+    // Add a start value.
+    if (!L->contains(Pred)) {
+      PN->addIncoming(StartV, Pred);
+      continue;
+    }
+
+    // Create a step value and add it to the PHI. If IVIncInsertLoop is
+    // non-null and equal to the addrec's loop, insert the instructions
+    // at IVIncInsertPos.
+    Instruction *InsertPos = L == IVIncInsertLoop ?
+      IVIncInsertPos : Pred->getTerminator();
+    Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
+    Value *IncV;
+    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+    if (isPointer) {
+      const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+      // If the step isn't constant, don't use an implicitly scaled GEP, because
+      // that would require a multiply inside the loop.
+      if (!isa<ConstantInt>(StepV))
+        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                    GEPPtrTy->getAddressSpace());
+      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+      if (IncV->getType() != PN->getType()) {
+        IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+        rememberInstruction(IncV);
+      }
+    } else {
+      IncV = isNegative ?
+        Builder.CreateSub(PN, StepV, "lsr.iv.next") :
+        Builder.CreateAdd(PN, StepV, "lsr.iv.next");
+      rememberInstruction(IncV);
+    }
+    PN->addIncoming(IncV, Pred);
+  }
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  // Remember this PHI, even in post-inc mode.
+  InsertedValues.insert(PN);
+
+  return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+  const Type *STy = S->getType();
+  const Type *IntTy = SE.getEffectiveSCEVType(STy);
+  const Loop *L = S->getLoop();
+
+  // Determine a normalized form of this expression, which is the expression
+  // before any post-inc adjustment is made.
+  const SCEVAddRecExpr *Normalized = S;
+  if (L == PostIncLoop) {
+    const SCEV *Step = S->getStepRecurrence(SE);
+    Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec start.
+  const SCEV *Start = Normalized->getStart();
+  const SCEV *PostLoopOffset = 0;
+  if (!Start->properlyDominates(L->getHeader(), SE.DT)) {
+    PostLoopOffset = Start;
+    Start = SE.getIntegerSCEV(0, Normalized->getType());
+    Normalized =
+      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start,
+                                            Normalized->getStepRecurrence(SE),
+                                            Normalized->getLoop()));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec step.
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  const SCEV *PostLoopScale = 0;
+  if (!Step->hasComputableLoopEvolution(L) &&
+      !Step->dominates(L->getHeader(), SE.DT)) {
+    PostLoopScale = Step;
+    Step = SE.getIntegerSCEV(1, Normalized->getType());
+    Normalized =
+      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
+                                            Normalized->getLoop()));
+  }
+
+  // Expand the core addrec. If we need post-loop scaling, force it to
+  // expand to an integer type to avoid the need for additional casting.
+  const Type *ExpandTy = PostLoopScale ? IntTy : STy;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+
+  // Accomodate post-inc mode, if necessary.
+  Value *Result;
+  if (L != PostIncLoop)
+    Result = PN;
+  else {
+    // In PostInc mode, use the post-incremented value.
+    BasicBlock *LatchBlock = L->getLoopLatch();
+    assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+    Result = PN->getIncomingValueForBlock(LatchBlock);
+  }
+
+  // Re-apply any non-loop-dominating scale.
+  if (PostLoopScale) {
+    Result = Builder.CreateMul(Result,
+                               expandCodeFor(PostLoopScale, IntTy));
+    rememberInstruction(Result);
+  }
+
+  // Re-apply any non-loop-dominating offset.
+  if (PostLoopOffset) {
+    if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+      const SCEV *const OffsetArray[1] = { PostLoopOffset };
+      Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+    } else {
+      Result = Builder.CreateAdd(Result,
+                                 expandCodeFor(PostLoopOffset, IntTy));
+      rememberInstruction(Result);
+    }
+  }
+
+  return Result;
+}
+
 Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+  if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
   const Loop *L = S->getLoop();
 
@@ -681,7 +872,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     // specified loop.
     BasicBlock *Header = L->getHeader();
     PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
-    InsertedValues.insert(PN);
+    rememberInstruction(PN);
 
     Constant *One = ConstantInt::get(Ty, 1);
     for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
@@ -691,7 +882,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
         // corresponding to the back-edge.
         Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
                                                      (*HPI)->getTerminator());
-        InsertedValues.insert(Add);
+        rememberInstruction(Add);
         PN->addIncoming(Add, *HPI);
       } else {
         PN->addIncoming(Constant::getNullValue(Ty), *HPI);
@@ -738,7 +929,7 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
   Value *I = Builder.CreateTrunc(V, Ty, "tmp");
-  InsertedValues.insert(I);
+  rememberInstruction(I);
   return I;
 }
 
@@ -747,7 +938,7 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
   Value *I = Builder.CreateZExt(V, Ty, "tmp");
-  InsertedValues.insert(I);
+  rememberInstruction(I);
   return I;
 }
 
@@ -756,7 +947,7 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
   Value *I = Builder.CreateSExt(V, Ty, "tmp");
-  InsertedValues.insert(I);
+  rememberInstruction(I);
   return I;
 }
 
@@ -772,9 +963,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
     }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
-    InsertedValues.insert(ICmp);
+    rememberInstruction(ICmp);
     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
-    InsertedValues.insert(Sel);
+    rememberInstruction(Sel);
     LHS = Sel;
   }
   // In the case of mixed integer and pointer types, cast the
@@ -796,9 +987,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
     }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
     Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
-    InsertedValues.insert(ICmp);
+    rememberInstruction(ICmp);
     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
-    InsertedValues.insert(Sel);
+    rememberInstruction(Sel);
     LHS = Sel;
   }
   // In the case of mixed integer and pointer types, cast the
@@ -863,7 +1054,8 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Value *V = visit(S);
 
   // Remember the expanded value for this SCEV at this location.
-  InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+  if (!PostIncLoop)
+    InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
   Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
author	Dan Gohman <gohman@apple.com>	2010-01-21 02:09:26 +0000
committer	Dan Gohman <gohman@apple.com>	2010-01-21 02:09:26 +0000
commit	d7140f1336529590de9a323ac6feff8391bf6dc8 (patch)
tree	52a9cf0b867521026963ca127f6a46d19f0f8f78 /lib/Analysis
parent	a6120d5023892e1397a44c36669676ceb2a24907 (diff)
download	external_llvm-d7140f1336529590de9a323ac6feff8391bf6dc8.tar.gz external_llvm-d7140f1336529590de9a323ac6feff8391bf6dc8.tar.bz2 external_llvm-d7140f1336529590de9a323ac6feff8391bf6dc8.zip