Merge with LLVM upstream 2011/10/20 (r142530)

Conflicts: lib/Support/Unix/Host.inc Change-Id: Idc00db3b63912dca6348bddd9f8a1af2a8d5d147
author: Logan Chien <loganchien@google.com> 2011-10-20 00:08:13 +0800
committer: Logan Chien <loganchien@google.com> 2011-10-20 00:09:35 +0800
commit: 0ebc07a576037e4e36f68bf5cece32740ca120c0 (patch)
tree: c2e40648043d01498ee25af839a071193561e425 /lib/Analysis
parent: 62383e889e0b06fd12a6b88311717cd33a1925c4 (diff)
parent: cdd8e46bec4e975d00a5abea808d8eb4138515c5 (diff)
download: external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.tar.gz
external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.tar.bz2
external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.zip
29 files changed, 2312 insertions, 928 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index bfa02e0e1f..bd132c05c3 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -237,6 +237,19 @@ AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
                   VI->getMetadata(LLVMContext::MD_tbaa));
 }
 
+AliasAnalysis::Location
+AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
+  return Location(CXI->getPointerOperand(),
+                  getTypeStoreSize(CXI->getCompareOperand()->getType()),
+                  CXI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
+  return Location(RMWI->getPointerOperand(),
+                  getTypeStoreSize(RMWI->getValOperand()->getType()),
+                  RMWI->getMetadata(LLVMContext::MD_tbaa));
+}
 
 AliasAnalysis::Location 
 AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
@@ -268,8 +281,8 @@ AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
-  // Be conservative in the face of volatile.
-  if (L->isVolatile())
+  // Be conservative in the face of volatile/atomic.
+  if (!L->isUnordered())
     return ModRef;
 
   // If the load address doesn't alias the given address, it doesn't read
@@ -283,8 +296,8 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
-  // Be conservative in the face of volatile.
-  if (S->isVolatile())
+  // Be conservative in the face of volatile/atomic.
+  if (!S->isUnordered())
     return ModRef;
 
   // If the store address cannot alias the pointer in question, then the
@@ -317,6 +330,33 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
   return ModRef;
 }
 
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
+  // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
+  if (CX->getOrdering() > Monotonic)
+    return ModRef;
+
+  // If the cmpxchg address does not alias the location, it does not access it.
+  if (!alias(getLocation(CX), Loc))
+    return NoModRef;
+
+  return ModRef;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
+  // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
+  if (RMW->getOrdering() > Monotonic)
+    return ModRef;
+
+  // If the atomicrmw address does not alias the location, it does not access it.
+  if (!alias(getLocation(RMW), Loc))
+    return NoModRef;
+
+  return ModRef;
+}
+
+
 // AliasAnalysis destructor: DO NOT move this to the header file for
 // AliasAnalysis or else clients of the AliasAnalysis class may not depend on
 // the AliasAnalysis.o file in the current .a file, causing alias analysis
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 2ed6949412..3fcd3b55de 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -56,12 +56,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
       AliasTy = MayAlias;
   }
 
-  if (CallSites.empty()) {            // Merge call sites...
-    if (!AS.CallSites.empty())
-      std::swap(CallSites, AS.CallSites);
-  } else if (!AS.CallSites.empty()) {
-    CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
-    AS.CallSites.clear();
+  if (UnknownInsts.empty()) {            // Merge call sites...
+    if (!AS.UnknownInsts.empty())
+      std::swap(UnknownInsts, AS.UnknownInsts);
+  } else if (!AS.UnknownInsts.empty()) {
+    UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
+    AS.UnknownInsts.clear();
   }
 
   AS.Forward = this;  // Forward across AS now...
@@ -123,13 +123,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
   addRef();               // Entry points to alias set.
 }
 
-void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
-  CallSites.push_back(CS.getInstruction());
+void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+  UnknownInsts.push_back(I);
 
-  AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
-  if (Behavior == AliasAnalysis::DoesNotAccessMemory)
-    return;
-  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+  if (!I->mayWriteToMemory()) {
     AliasTy = MayAlias;
     AccessTy |= Refs;
     return;
@@ -147,7 +144,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
                               const MDNode *TBAAInfo,
                               AliasAnalysis &AA) const {
   if (AliasTy == MustAlias) {
-    assert(CallSites.empty() && "Illegal must alias set!");
+    assert(UnknownInsts.empty() && "Illegal must alias set!");
 
     // If this is a set of MustAliases, only check to see if the pointer aliases
     // SOME value in the set.
@@ -167,10 +164,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
                                          I.getTBAAInfo())))
       return true;
 
-  // Check the call sites list and invoke list...
-  if (!CallSites.empty()) {
-    for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
-      if (AA.getModRefInfo(CallSites[i],
+  // Check the unknown instructions...
+  if (!UnknownInsts.empty()) {
+    for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
+      if (AA.getModRefInfo(UnknownInsts[i],
                            AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
             AliasAnalysis::NoModRef)
         return true;
@@ -179,18 +176,20 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
   return false;
 }
 
-bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
-  if (AA.doesNotAccessMemory(CS))
+bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
+  if (!Inst->mayReadOrWriteMemory())
     return false;
 
-  for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
-    if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
-        AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
+  for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
+    CallSite C1 = getUnknownInst(i), C2 = Inst;
+    if (!C1 || !C2 ||
+        AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
+        AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
       return true;
   }
 
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+    if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) !=
            AliasAnalysis::NoModRef)
       return true;
 
@@ -244,10 +243,10 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
 
 
 
-AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
   AliasSet *FoundSet = 0;
   for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (I->Forward || !I->aliasesCallSite(CS, AA))
+    if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
       continue;
     
     if (FoundSet == 0)        // If this is the first alias set ptr can go into.
@@ -296,22 +295,28 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
 
 
 bool AliasSetTracker::add(LoadInst *LI) {
+  if (LI->getOrdering() > Monotonic) return addUnknown(LI);
+  AliasSet::AccessType ATy = AliasSet::Refs;
+  if (!LI->isUnordered()) ATy = AliasSet::ModRef;
   bool NewPtr;
   AliasSet &AS = addPointer(LI->getOperand(0),
                             AA.getTypeStoreSize(LI->getType()),
                             LI->getMetadata(LLVMContext::MD_tbaa),
-                            AliasSet::Refs, NewPtr);
+                            ATy, NewPtr);
   if (LI->isVolatile()) AS.setVolatile();
   return NewPtr;
 }
 
 bool AliasSetTracker::add(StoreInst *SI) {
+  if (SI->getOrdering() > Monotonic) return addUnknown(SI);
+  AliasSet::AccessType ATy = AliasSet::Mods;
+  if (!SI->isUnordered()) ATy = AliasSet::ModRef;
   bool NewPtr;
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
                             AA.getTypeStoreSize(Val->getType()),
                             SI->getMetadata(LLVMContext::MD_tbaa),
-                            AliasSet::Mods, NewPtr);
+                            ATy, NewPtr);
   if (SI->isVolatile()) AS.setVolatile();
   return NewPtr;
 }
@@ -325,20 +330,20 @@ bool AliasSetTracker::add(VAArgInst *VAAI) {
 }
 
 
-bool AliasSetTracker::add(CallSite CS) {
-  if (isa<DbgInfoIntrinsic>(CS.getInstruction())) 
+bool AliasSetTracker::addUnknown(Instruction *Inst) {
+  if (isa<DbgInfoIntrinsic>(Inst)) 
     return true; // Ignore DbgInfo Intrinsics.
-  if (AA.doesNotAccessMemory(CS))
+  if (!Inst->mayReadOrWriteMemory())
     return true; // doesn't alias anything
 
-  AliasSet *AS = findAliasSetForCallSite(CS);
+  AliasSet *AS = findAliasSetForUnknownInst(Inst);
   if (AS) {
-    AS->addCallSite(CS, AA);
+    AS->addUnknownInst(Inst, AA);
     return false;
   }
   AliasSets.push_back(new AliasSet());
   AS = &AliasSets.back();
-  AS->addCallSite(CS, AA);
+  AS->addUnknownInst(Inst, AA);
   return true;
 }
 
@@ -348,13 +353,9 @@ bool AliasSetTracker::add(Instruction *I) {
     return add(LI);
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return add(SI);
-  if (CallInst *CI = dyn_cast<CallInst>(I))
-    return add(CI);
-  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
-    return add(II);
   if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
     return add(VAAI);
-  return true;
+  return addUnknown(I);
 }
 
 void AliasSetTracker::add(BasicBlock &BB) {
@@ -375,8 +376,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
     AliasSet &AS = const_cast<AliasSet&>(*I);
 
     // If there are any call sites in the alias set, add them to this AST.
-    for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
-      add(AS.CallSites[i]);
+    for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i)
+      add(AS.UnknownInsts[i]);
 
     // Loop over all of the pointers in this alias set.
     bool X;
@@ -393,7 +394,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
 /// tracker.
 void AliasSetTracker::remove(AliasSet &AS) {
   // Drop all call sites.
-  AS.CallSites.clear();
+  AS.UnknownInsts.clear();
   
   // Clear the alias set.
   unsigned NumRefs = 0;
@@ -453,11 +454,11 @@ bool AliasSetTracker::remove(VAArgInst *VAAI) {
   return true;
 }
 
-bool AliasSetTracker::remove(CallSite CS) {
-  if (AA.doesNotAccessMemory(CS))
+bool AliasSetTracker::removeUnknown(Instruction *I) {
+  if (!I->mayReadOrWriteMemory())
     return false; // doesn't alias anything
 
-  AliasSet *AS = findAliasSetForCallSite(CS);
+  AliasSet *AS = findAliasSetForUnknownInst(I);
   if (!AS) return false;
   remove(*AS);
   return true;
@@ -469,11 +470,9 @@ bool AliasSetTracker::remove(Instruction *I) {
     return remove(LI);
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return remove(SI);
-  if (CallInst *CI = dyn_cast<CallInst>(I))
-    return remove(CI);
   if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
     return remove(VAAI);
-  return true;
+  return removeUnknown(I);
 }
 
 
@@ -488,13 +487,13 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
 
   // If this is a call instruction, remove the callsite from the appropriate
   // AliasSet (if present).
-  if (CallSite CS = PtrVal) {
-    if (!AA.doesNotAccessMemory(CS)) {
+  if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
+    if (Inst->mayReadOrWriteMemory()) {
       // Scan all the alias sets to see if this call site is contained.
       for (iterator I = begin(), E = end(); I != E; ++I) {
         if (I->Forward) continue;
         
-        I->removeCallSite(CS);
+        I->removeUnknownInst(Inst);
       }
     }
   }
@@ -571,11 +570,11 @@ void AliasSet::print(raw_ostream &OS) const {
       OS << ", " << I.getSize() << ")";
     }
   }
-  if (!CallSites.empty()) {
-    OS << "\n    " << CallSites.size() << " Call Sites: ";
-    for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+  if (!UnknownInsts.empty()) {
+    OS << "\n    " << UnknownInsts.size() << " Unknown instructions: ";
+    for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
       if (i) OS << ", ";
-      WriteAsOperand(OS, CallSites[i]);
+      WriteAsOperand(OS, UnknownInsts[i]);
     }
   }
   OS << "\n";
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 71e0a83269..0ba6af93b5 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Analysis.h"
+#include "llvm-c/Initialization.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Analysis/Verifier.h"
 #include <cstring>
@@ -23,7 +24,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeAliasSetPrinterPass(Registry);
   initializeNoAAPass(Registry);
   initializeBasicAliasAnalysisPass(Registry);
-  initializeBlockFrequencyPass(Registry);
+  initializeBlockFrequencyInfoPass(Registry);
   initializeBranchProbabilityInfoPass(Registry);
   initializeCFGViewerPass(Registry);
   initializeCFGPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 116076ce2a..af400ba7e7 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -374,7 +375,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       }
       
       if (Scale) {
-        VariableGEPIndex Entry = {Index, Extension, Scale};
+        VariableGEPIndex Entry = {Index, Extension,
+                                  static_cast<int64_t>(Scale)};
         VarIndices.push_back(Entry);
       }
     }
@@ -467,6 +469,7 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     virtual AliasResult alias(const Location &LocA,
@@ -549,10 +552,15 @@ namespace {
 
 // Register this pass...
 char BasicAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+                   "Basic Alias Analysis (stateless AA impl)",
+                   false, true, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
                    "Basic Alias Analysis (stateless AA impl)",
                    false, true, false)
 
+
 ImmutablePass *llvm::createBasicAliasAnalysisPass() {
   return new BasicAliasAnalysis();
 }
@@ -706,7 +714,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // is impossible to alias the pointer we're checking.  If not, we have to
       // assume that the call could touch the pointer, even though it doesn't
       // escape.
-      if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
+      if (!isNoAlias(Location(*CI), Location(Object))) {
         PassedAsArg = true;
         break;
       }
@@ -716,6 +724,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       return NoModRef;
   }
 
+  const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
   ModRefResult Min = ModRef;
 
   // Finally, handle specific knowledge of intrinsics.
@@ -754,26 +763,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // We know that memset doesn't load anything.
       Min = Mod;
       break;
-    case Intrinsic::atomic_cmp_swap:
-    case Intrinsic::atomic_swap:
-    case Intrinsic::atomic_load_add:
-    case Intrinsic::atomic_load_sub:
-    case Intrinsic::atomic_load_and:
-    case Intrinsic::atomic_load_nand:
-    case Intrinsic::atomic_load_or:
-    case Intrinsic::atomic_load_xor:
-    case Intrinsic::atomic_load_max:
-    case Intrinsic::atomic_load_min:
-    case Intrinsic::atomic_load_umax:
-    case Intrinsic::atomic_load_umin:
-      if (TD) {
-        Value *Op1 = II->getArgOperand(0);
-        uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
-        MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
-        if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
-          return NoModRef;
-      }
-      break;
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start: {
@@ -818,6 +807,39 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     }
     }
 
+  // We can bound the aliasing properties of memset_pattern16 just as we can
+  // for memcpy/memset.  This is particularly important because the 
+  // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+  // whenever possible.
+  else if (TLI.has(LibFunc::memset_pattern16) &&
+           CS.getCalledFunction() &&
+           CS.getCalledFunction()->getName() == "memset_pattern16") {
+    const Function *MS = CS.getCalledFunction();
+    FunctionType *MemsetType = MS->getFunctionType();
+    if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+        isa<PointerType>(MemsetType->getParamType(0)) &&
+        isa<PointerType>(MemsetType->getParamType(1)) &&
+        isa<IntegerType>(MemsetType->getParamType(2))) {
+      uint64_t Len = UnknownSize;
+      if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
+        Len = LenCI->getZExtValue();
+      const Value *Dest = CS.getArgument(0);
+      const Value *Src = CS.getArgument(1);
+      // If it can't overlap the source dest, then it doesn't modref the loc.
+      if (isNoAlias(Location(Dest, Len), Loc)) {
+        // Always reads 16 bytes of the source.
+        if (isNoAlias(Location(Src, 16), Loc))
+          return NoModRef;
+        // If it can't overlap the dest, then worst case it reads the loc.
+        Min = Ref;
+      // Always reads 16 bytes of the source.
+      } else if (isNoAlias(Location(Src, 16), Loc)) {
+        // If it can't overlap the source, then worst case it mutates the loc.
+        Min = Mod;
+      }
+    }
+  }
+
   // The AliasAnalysis base class has some smarts, lets use them.
   return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
 }
@@ -913,43 +935,43 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
     return MustAlias;
 
-  // If there is a difference between the pointers, but the difference is
-  // less than the size of the associated memory object, then we know
-  // that the objects are partially overlapping.
+  // If there is a constant difference between the pointers, but the difference
+  // is less than the size of the associated memory object, then we know
+  // that the objects are partially overlapping.  If the difference is
+  // greater, we know they do not overlap.
   if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
-    if (GEP1BaseOffset >= 0 ?
-        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
-        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
-         GEP1BaseOffset != INT64_MIN))
-      return PartialAlias;
+    if (GEP1BaseOffset >= 0) {
+      if (V2Size != UnknownSize) {
+        if ((uint64_t)GEP1BaseOffset < V2Size)
+          return PartialAlias;
+        return NoAlias;
+      }
+    } else {
+      if (V1Size != UnknownSize) {
+        if (-(uint64_t)GEP1BaseOffset < V1Size)
+          return PartialAlias;
+        return NoAlias;
+      }
+    }
   }
 
-  // If we have a known constant offset, see if this offset is larger than the
-  // access size being queried.  If so, and if no variable indices can remove
-  // pieces of this constant, then we know we have a no-alias.  For example,
-  //   &A[100] != &A.
-  
-  // In order to handle cases like &A[100][i] where i is an out of range
-  // subscript, we have to ignore all constant offset pieces that are a multiple
-  // of a scaled index.  Do this by removing constant offsets that are a
-  // multiple of any of our variable indices.  This allows us to transform
-  // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
-  // provides an offset of 4 bytes (assuming a <= 4 byte access).
-  for (unsigned i = 0, e = GEP1VariableIndices.size();
-       i != e && GEP1BaseOffset;++i)
-    if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
-      GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
-  
-  // If our known offset is bigger than the access size, we know we don't have
-  // an alias.
-  if (GEP1BaseOffset) {
-    if (GEP1BaseOffset >= 0 ?
-        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
-        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
-         GEP1BaseOffset != INT64_MIN))
+  // Try to distinguish something like &A[i][1] against &A[42][0].
+  // Grab the least significant bit set in any of the scales.
+  if (!GEP1VariableIndices.empty()) {
+    uint64_t Modulo = 0;
+    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
+      Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+    Modulo = Modulo ^ (Modulo & (Modulo - 1));
+
+    // We can compute the difference between the two addresses
+    // mod Modulo. Check whether that difference guarantees that the
+    // two locations do not alias.
+    uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
+    if (V1Size != UnknownSize && V2Size != UnknownSize &&
+        ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
       return NoAlias;
   }
-  
+
   // Statically, we can see that the base objects are the same, but the
   // pointers have dynamic offsets which we can't resolve. And none of our
   // little tricks above worked.
diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp
deleted file mode 100644
index 4b86d1db1f..0000000000
--- a/lib/Analysis/BlockFrequency.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Loops should be simplified before this analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
-#include "llvm/Analysis/BlockFrequency.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-
-using namespace llvm;
-
-INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis",
-                      true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis",
-                    true, true)
-
-char BlockFrequency::ID = 0;
-
-
-BlockFrequency::BlockFrequency() : FunctionPass(ID) {
-  initializeBlockFrequencyPass(*PassRegistry::getPassRegistry());
-  BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
-}
-
-BlockFrequency::~BlockFrequency() {
-  delete BFI;
-}
-
-void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BranchProbabilityInfo>();
-  AU.setPreservesAll();
-}
-
-bool BlockFrequency::runOnFunction(Function &F) {
-  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
-  BFI->doFunction(&F, &BPI);
-  return false;
-}
-
-/// getblockFreq - Return block frequency. Never return 0, value must be
-/// positive. Please note that initial frequency is equal to 1024. It means that
-/// we should not rely on the value itself, but only on the comparison to the
-/// other block frequencies. We do this to avoid using of floating points.
-///
-uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) {
-  return BFI->getBlockFreq(BB);
-}
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
new file mode 100644
index 0000000000..d16665fa55
--- /dev/null
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -0,0 +1,63 @@
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
+                      true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
+                    true, true)
+
+char BlockFrequencyInfo::ID = 0;
+
+
+BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
+  initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+  BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequencyInfo::~BlockFrequencyInfo() {
+  delete BFI;
+}
+
+void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<BranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool BlockFrequencyInfo::runOnFunction(Function &F) {
+  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  BFI->doFunction(&F, &BPI);
+  return false;
+}
+
+void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
+  if (BFI) BFI->print(O);
+}
+
+/// getblockFreq - Return block frequency. Return 0 if we don't have the
+/// information. Please note that initial frequency is equal to 1024. It means
+/// that we should not rely on the value itself, but only on the comparison to
+/// the other block frequencies. We do this to avoid using of floating points.
+///
+BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const {
+  return BFI->getBlockFreq(BB);
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index e39cd221b5..52090c9fc1 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,7 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Support/Debug.h"
@@ -33,9 +36,7 @@ namespace {
 // private methods are hidden in the .cpp file.
 class BranchProbabilityAnalysis {
 
-  typedef std::pair<BasicBlock *, BasicBlock *> Edge;
-
-  DenseMap<Edge, uint32_t> *Weights;
+  typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
 
   BranchProbabilityInfo *BP;
 
@@ -52,7 +53,7 @@ class BranchProbabilityAnalysis {
   //          V
   //         BB1<-+
   //          |   |
-  //          |   | (Weight = 128)
+  //          |   | (Weight = 124)
   //          V   |
   //         BB2--+
   //          |
@@ -60,12 +61,21 @@ class BranchProbabilityAnalysis {
   //          V
   //         BB3
   //
-  // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696..
-  // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303..
+  // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
+  // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
 
-  static const uint32_t LBH_TAKEN_WEIGHT = 128;
+  static const uint32_t LBH_TAKEN_WEIGHT = 124;
   static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
 
+  static const uint32_t RH_TAKEN_WEIGHT = 24;
+  static const uint32_t RH_NONTAKEN_WEIGHT = 8;
+
+  static const uint32_t PH_TAKEN_WEIGHT = 20;
+  static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+
+  static const uint32_t ZH_TAKEN_WEIGHT = 20;
+  static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+
   // Standard weight value. Used when none of the heuristics set weight for
   // the edge.
   static const uint32_t NORMAL_WEIGHT = 16;
@@ -100,82 +110,128 @@ class BranchProbabilityAnalysis {
     return false;
   }
 
-  // Multiply Edge Weight by two.
-  void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
-    uint32_t Weight = BP->getEdgeWeight(Src, Dst);
-    uint32_t MaxWeight = getMaxWeightFor(Src);
-
-    if (Weight * 2 > MaxWeight)
-      BP->setEdgeWeight(Src, Dst, MaxWeight);
-    else
-      BP->setEdgeWeight(Src, Dst, Weight * 2);
-  }
-
-  // Divide Edge Weight by two.
-  void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
-    uint32_t Weight = BP->getEdgeWeight(Src, Dst);
-
-    assert(Weight > 0);
-    if (Weight / 2 < MIN_WEIGHT)
-      BP->setEdgeWeight(Src, Dst, MIN_WEIGHT);
-    else
-      BP->setEdgeWeight(Src, Dst, Weight / 2);
-  }
-
-
   uint32_t getMaxWeightFor(BasicBlock *BB) const {
     return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
   }
 
 public:
-  BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
-                            BranchProbabilityInfo *BP, LoopInfo *LI)
-    : Weights(W), BP(BP), LI(LI) {
+  BranchProbabilityAnalysis(BranchProbabilityInfo *BP, LoopInfo *LI)
+    : BP(BP), LI(LI) {
   }
 
+  // Metadata Weights
+  bool calcMetadataWeights(BasicBlock *BB);
+
   // Return Heuristics
-  void calcReturnHeuristics(BasicBlock *BB);
+  bool calcReturnHeuristics(BasicBlock *BB);
 
   // Pointer Heuristics
-  void calcPointerHeuristics(BasicBlock *BB);
+  bool calcPointerHeuristics(BasicBlock *BB);
 
   // Loop Branch Heuristics
-  void calcLoopBranchHeuristics(BasicBlock *BB);
+  bool calcLoopBranchHeuristics(BasicBlock *BB);
+
+  // Zero Heurestics
+  bool calcZeroHeuristics(BasicBlock *BB);
 
   bool runOnFunction(Function &F);
 };
 } // end anonymous namespace
 
+// Propagate existing explicit probabilities from either profile data or
+// 'expect' intrinsic processing.
+bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) {
+  TerminatorInst *TI = BB->getTerminator();
+  if (TI->getNumSuccessors() == 1)
+    return false;
+  if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+    return false;
+
+  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+  if (!WeightsNode)
+    return false;
+
+  // Ensure there are weights for all of the successors. Note that the first
+  // operand to the metadata node is a name, not a weight.
+  if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
+    return false;
+
+  // Build up the final weights that will be used in a temporary buffer, but
+  // don't add them until all weihts are present. Each weight value is clamped
+  // to [1, getMaxWeightFor(BB)].
+  uint32_t WeightLimit = getMaxWeightFor(BB);
+  SmallVector<uint32_t, 2> Weights;
+  Weights.reserve(TI->getNumSuccessors());
+  for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
+    ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i));
+    if (!Weight)
+      return false;
+    Weights.push_back(
+      std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit)));
+  }
+  assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]);
+
+  return true;
+}
+
 // Calculate Edge Weights using "Return Heuristics". Predict a successor which
 // leads directly to Return Instruction will not be taken.
-void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
+bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
   if (BB->getTerminator()->getNumSuccessors() == 1)
-    return;
+    return false;
+
+  SmallPtrSet<BasicBlock *, 4> ReturningEdges;
+  SmallPtrSet<BasicBlock *, 4> StayEdges;
 
   for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
     BasicBlock *Succ = *I;
-    if (isReturningBlock(Succ)) {
-      decEdgeWeight(BB, Succ);
+    if (isReturningBlock(Succ))
+      ReturningEdges.insert(Succ);
+    else
+      StayEdges.insert(Succ);
+  }
+
+  if (uint32_t numStayEdges = StayEdges.size()) {
+    uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges;
+    if (stayWeight < NORMAL_WEIGHT)
+      stayWeight = NORMAL_WEIGHT;
+
+    for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(),
+         E = StayEdges.end(); I != E; ++I)
+      BP->setEdgeWeight(BB, *I, stayWeight);
+  }
+
+  if (uint32_t numRetEdges = ReturningEdges.size()) {
+    uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges;
+    if (retWeight < MIN_WEIGHT)
+      retWeight = MIN_WEIGHT;
+    for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(),
+         E = ReturningEdges.end(); I != E; ++I) {
+      BP->setEdgeWeight(BB, *I, retWeight);
     }
   }
+
+  return ReturningEdges.size() > 0;
 }
 
 // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
 // between two pointer or pointer and NULL will fail.
-void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
   BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
   if (!BI || !BI->isConditional())
-    return;
+    return false;
 
   Value *Cond = BI->getCondition();
   ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
   if (!CI || !CI->isEquality())
-    return;
+    return false;
 
   Value *LHS = CI->getOperand(0);
 
   if (!LHS->getType()->isPointerTy())
-    return;
+    return false;
 
   assert(CI->getOperand(1)->getType()->isPointerTy());
 
@@ -190,29 +246,35 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
   if (!isProb)
     std::swap(Taken, NonTaken);
 
-  incEdgeWeight(BB, Taken);
-  decEdgeWeight(BB, NonTaken);
+  BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT);
+  BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT);
+  return true;
 }
 
 // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
 // as taken, exiting edges as not-taken.
-void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
+bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
   uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
 
   Loop *L = LI->getLoopFor(BB);
   if (!L)
-    return;
+    return false;
+
+  SmallPtrSet<BasicBlock *, 8> BackEdges;
+  SmallPtrSet<BasicBlock *, 8> ExitingEdges;
+  SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop.
 
-  SmallVector<BasicBlock *, 8> BackEdges;
-  SmallVector<BasicBlock *, 8> ExitingEdges;
+  bool isHeader = BB == L->getHeader();
 
   for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
     BasicBlock *Succ = *I;
     Loop *SuccL = LI->getLoopFor(Succ);
     if (SuccL != L)
-      ExitingEdges.push_back(Succ);
+      ExitingEdges.insert(Succ);
     else if (Succ == L->getHeader())
-      BackEdges.push_back(Succ);
+      BackEdges.insert(Succ);
+    else if (isHeader)
+      InEdges.insert(Succ);
   }
 
   if (uint32_t numBackEdges = BackEdges.size()) {
@@ -220,39 +282,121 @@ void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
     if (backWeight < NORMAL_WEIGHT)
       backWeight = NORMAL_WEIGHT;
 
-    for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
+    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
          EE = BackEdges.end(); EI != EE; ++EI) {
       BasicBlock *Back = *EI;
       BP->setEdgeWeight(BB, Back, backWeight);
     }
   }
 
+  if (uint32_t numInEdges = InEdges.size()) {
+    uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
+    if (inWeight < NORMAL_WEIGHT)
+      inWeight = NORMAL_WEIGHT;
+
+    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(),
+         EE = InEdges.end(); EI != EE; ++EI) {
+      BasicBlock *Back = *EI;
+      BP->setEdgeWeight(BB, Back, inWeight);
+    }
+  }
+
   uint32_t numExitingEdges = ExitingEdges.size();
   if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
     uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
     if (exitWeight < MIN_WEIGHT)
       exitWeight = MIN_WEIGHT;
 
-    for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
+    for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
          EE = ExitingEdges.end(); EI != EE; ++EI) {
       BasicBlock *Exiting = *EI;
       BP->setEdgeWeight(BB, Exiting, exitWeight);
     }
   }
+
+  return true;
 }
 
+bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) {
+  BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI)
+    return false;
+
+  Value *RHS = CI->getOperand(1);
+  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+  if (!CV)
+    return false;
+
+  bool isProb;
+  if (CV->isZero()) {
+    switch (CI->getPredicate()) {
+    case CmpInst::ICMP_EQ:
+      // X == 0   ->  Unlikely
+      isProb = false;
+      break;
+    case CmpInst::ICMP_NE:
+      // X != 0   ->  Likely
+      isProb = true;
+      break;
+    case CmpInst::ICMP_SLT:
+      // X < 0   ->  Unlikely
+      isProb = false;
+      break;
+    case CmpInst::ICMP_SGT:
+      // X > 0   ->  Likely
+      isProb = true;
+      break;
+    default:
+      return false;
+    }
+  } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
+    // InstCombine canonicalizes X <= 0 into X < 1.
+    // X <= 0   ->  Unlikely
+    isProb = false;
+  } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
+    // InstCombine canonicalizes X >= 0 into X > -1.
+    // X >= 0   ->  Likely
+    isProb = true;
+  } else {
+    return false;
+  }
+
+  BasicBlock *Taken = BI->getSuccessor(0);
+  BasicBlock *NonTaken = BI->getSuccessor(1);
+
+  if (!isProb)
+    std::swap(Taken, NonTaken);
+
+  BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT);
+  BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT);
+
+  return true;
+}
+
+
 bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
 
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
     BasicBlock *BB = I++;
 
-    // Only LBH uses setEdgeWeight method.
-    calcLoopBranchHeuristics(BB);
+    if (calcMetadataWeights(BB))
+      continue;
+
+    if (calcLoopBranchHeuristics(BB))
+      continue;
 
-    // PH and RH use only incEdgeWeight and decEwdgeWeight methods to
-    // not efface LBH results.
-    calcPointerHeuristics(BB);
-    calcReturnHeuristics(BB);
+    if (calcReturnHeuristics(BB))
+      continue;
+
+    if (calcPointerHeuristics(BB))
+      continue;
+
+    calcZeroHeuristics(BB);
   }
 
   return false;
@@ -265,15 +409,15 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool BranchProbabilityInfo::runOnFunction(Function &F) {
   LoopInfo &LI = getAnalysis<LoopInfo>();
-  BranchProbabilityAnalysis BPA(&Weights, this, &LI);
+  BranchProbabilityAnalysis BPA(this, &LI);
   return BPA.runOnFunction(F);
 }
 
-uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
+uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
   uint32_t Sum = 0;
 
-  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    BasicBlock *Succ = *I;
+  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    const BasicBlock *Succ = *I;
     uint32_t Weight = getEdgeWeight(BB, Succ);
     uint32_t PrevSum = Sum;
 
@@ -284,7 +428,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
   return Sum;
 }
 
-bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const {
+bool BranchProbabilityInfo::
+isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
   uint32_t Weight = getEdgeWeight(Src, Dst);
   uint32_t Sum = getSumForBlock(Src);
@@ -321,8 +466,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
 }
 
 // Return edge's weight. If can't find it, return DEFAULT_WEIGHT value.
-uint32_t
-BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
+uint32_t BranchProbabilityInfo::
+getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
   Edge E(Src, Dst);
   DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E);
 
@@ -332,8 +477,8 @@ BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
   return DEFAULT_WEIGHT;
 }
 
-void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
-                                     uint32_t Weight) {
+void BranchProbabilityInfo::
+setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) {
   Weights[std::make_pair(Src, Dst)] = Weight;
   DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
                << Dst->getNameStr() << " weight to " << Weight
@@ -342,7 +487,7 @@ void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
 
 
 BranchProbability BranchProbabilityInfo::
-getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
+getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
 
   uint32_t N = getEdgeWeight(Src, Dst);
   uint32_t D = getSumForBlock(Src);
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ab846a26b4..e79459d7a4 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,7 +6,7 @@ add_llvm_library(LLVMAnalysis
   AliasSetTracker.cpp
   Analysis.cpp
   BasicAliasAnalysis.cpp
-  BlockFrequency.cpp
+  BlockFrequencyInfo.cpp
   BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
@@ -58,4 +58,10 @@ add_llvm_library(LLVMAnalysis
   ValueTracking.cpp
   )
 
+add_llvm_library_dependencies(LLVMAnalysis
+  LLVMCore
+  LLVMSupport
+  LLVMTarget
+  )
+
 add_subdirectory(IPA)
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index b60432be83..df79849c3c 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -45,8 +45,13 @@ using namespace llvm;
 /// ConstantExpr if unfoldable.
 static Constant *FoldBitCast(Constant *C, Type *DestTy,
                              const TargetData &TD) {
-  
-  // This only handles casts to vectors currently.
+  // Catch the obvious splat cases.
+  if (C->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+  if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
+    return Constant::getAllOnesValue(DestTy);
+
+  // The code below only handles casts to vectors currently.
   VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
   if (DestVTy == 0)
     return ConstantExpr::getBitCast(C, DestTy);
@@ -547,8 +552,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
-                                                            Ops.data() + 1,
-                                                            i-1))) &&
+                                                        Ops.slice(1, i-1)))) &&
         Ops[i]->getType() != IntPtrTy) {
       Any = true;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -562,7 +566,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
   if (!Any) return 0;
 
   Constant *C =
-    ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+    ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
       C = Folded;
@@ -702,7 +706,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 
   // Create a GEP.
   Constant *C =
-    ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+    ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
   assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
@@ -889,8 +893,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD))
       return C;
     
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.data() + 1,
-                                          Ops.size() - 1);
+    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
   }
 }
 
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index da5780827a..bfa429d541 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -29,14 +29,74 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
 }
 
 DIBuilder::DIBuilder(Module &m)
-  : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+  : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0),
+    TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0),
+    ValueFn(0)
+{}
+
+/// finalize - Construct any deferred debug info descriptors.
+void DIBuilder::finalize() {
+  DIArray Enums = getOrCreateArray(AllEnumTypes);
+  DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+
+  DIArray RetainTypes = getOrCreateArray(AllRetainTypes);
+  DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
+
+  DIArray SPs = getOrCreateArray(AllSubprograms);
+  DIType(TempSubprograms).replaceAllUsesWith(SPs);
+  for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+    DISubprogram SP(SPs.getElement(i));
+    if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) {
+      SmallVector<Value *, 4> Variables;
+      for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii)
+        Variables.push_back(NMD->getOperand(ii));
+      if (MDNode *Temp = SP.getVariablesNodes()) {
+        DIArray AV = getOrCreateArray(Variables);
+        DIType(Temp).replaceAllUsesWith(AV);
+      }
+      NMD->eraseFromParent();
+    }
+  }
+
+  DIArray GVs = getOrCreateArray(AllGVs);
+  DIType(TempGVs).replaceAllUsesWith(GVs);
+}
+
+/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
+/// N.
+static MDNode *getNonCompileUnitScope(MDNode *N) {
+  if (DIDescriptor(N).isCompileUnit())
+    return NULL;
+  return N;
+}
 
 /// createCompileUnit - A CompileUnit provides an anchor for all debugging
 /// information generated during this instance of compilation.
-void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, 
-                                  StringRef Directory, StringRef Producer, 
-                                  bool isOptimized, StringRef Flags, 
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
+                                  StringRef Directory, StringRef Producer,
+                                  bool isOptimized, StringRef Flags,
                                   unsigned RunTimeVer) {
+  assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89
+	  && "Invalid Language tag");
+  assert (!Filename.empty() 
+	  && "Unable to create compile unit without filename");
+  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
+  Value *THElts[] = { TempEnumTypes };
+  MDNode *EnumHolder = MDNode::get(VMContext, THElts);
+
+  TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
+  Value *TRElts[] = { TempRetainTypes };
+  MDNode *RetainHolder = MDNode::get(VMContext, TRElts);
+
+  TempSubprograms = MDNode::getTemporary(VMContext, TElts);
+  Value *TSElts[] = { TempSubprograms };
+  MDNode *SPHolder = MDNode::get(VMContext, TSElts);
+
+  TempGVs = MDNode::getTemporary(VMContext, TElts);
+  Value *TVElts[] = { TempGVs };
+  MDNode *GVHolder = MDNode::get(VMContext, TVElts);
+
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -48,7 +108,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
     ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     MDString::get(VMContext, Flags),
-    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
+    EnumHolder,
+    RetainHolder,
+    SPHolder,
+    GVHolder
   };
   TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
 
@@ -61,17 +125,19 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 /// for a file.
 DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
   assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+  assert(!Filename.empty() && "Unable to create file without name");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
     MDString::get(VMContext, Filename),
     MDString::get(VMContext, Directory),
-    TheCU
+    NULL // TheCU
   };
   return DIFile(MDNode::get(VMContext, Elts));
 }
 
 /// createEnumerator - Create a single enumerator value.
 DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+  assert(!Name.empty() && "Unable to create enumerator without name");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
     MDString::get(VMContext, Name),
@@ -80,16 +146,37 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
   return DIEnumerator(MDNode::get(VMContext, Elts));
 }
 
-/// createBasicType - Create debugging information entry for a basic 
+/// createNullPtrType - Create C++0x nullptr type.
+DIType DIBuilder::createNullPtrType(StringRef Name) {
+  assert(!Name.empty() && "Unable to create type without name");
+  // nullptr is encoded in DIBasicType format. Line number, filename,
+  // ,size, alignment, offset and flags are always empty here.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+    NULL, //TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Encoding
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic
 /// type, e.g 'char'.
-DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, 
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
                                   uint64_t AlignInBits,
                                   unsigned Encoding) {
+  assert(!Name.empty() && "Unable to create type without name");
   // Basic types are encoded in DIBasicType format. Line number, filename,
   // offset and flags are always empty here.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
-    TheCU,
+    NULL, //TheCU,
     MDString::get(VMContext, Name),
     NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -108,7 +195,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
   // Qualified types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
-    TheCU,
+    NULL, //TheCU,
     MDString::get(VMContext, StringRef()), // Empty name.
     NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -127,7 +214,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
   // Pointer types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
-    TheCU,
+    NULL, //TheCU,
     MDString::get(VMContext, Name),
     NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -142,10 +229,11 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
 
 /// createReferenceType - Create debugging information entry for a reference.
 DIType DIBuilder::createReferenceType(DIType RTy) {
+  assert(RTy.Verify() && "Unable to create reference type");
   // References are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
-    TheCU,
+    NULL, // TheCU,
     NULL, // Name
     NULL, // Filename
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
@@ -165,7 +253,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
   assert(Ty.Verify() && "Invalid typedef type!");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
@@ -199,9 +287,10 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
 }
 
 /// createInheritance - Create debugging information entry to establish
-/// inheritnace relationship between two types.
-DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, 
+/// inheritance relationship between two types.
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
                                     uint64_t BaseOffset, unsigned Flags) {
+  assert(Ty.Verify() && "Unable to create inheritance");
   // TAG_inheritance is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
@@ -219,15 +308,15 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
 }
 
 /// createMemberType - Create debugging information entry for a member.
-DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, 
-                                   DIFile File, unsigned LineNumber, 
+DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
+                                   DIFile File, unsigned LineNumber,
                                    uint64_t SizeInBits, uint64_t AlignInBits,
-                                   uint64_t OffsetInBits, unsigned Flags, 
+                                   uint64_t OffsetInBits, unsigned Flags,
                                    DIType Ty) {
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    Scope,
+    getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -242,17 +331,17 @@ DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
 
 /// createObjCIVar - Create debugging information entry for Objective-C
 /// instance variable.
-DIType DIBuilder::createObjCIVar(StringRef Name, 
-                                 DIFile File, unsigned LineNumber, 
+DIType DIBuilder::createObjCIVar(StringRef Name,
+                                 DIFile File, unsigned LineNumber,
                                  uint64_t SizeInBits, uint64_t AlignInBits,
-                                 uint64_t OffsetInBits, unsigned Flags, 
+                                 uint64_t OffsetInBits, unsigned Flags,
                                  DIType Ty, StringRef PropertyName,
                                  StringRef GetterName, StringRef SetterName,
                                  unsigned PropertyAttributes) {
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    File, // Or TheCU ? Ty ?
+    getNonCompileUnitScope(File),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -270,8 +359,8 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
 }
 
 /// createClassType - Create debugging information entry for a class.
-DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, 
-                                  DIFile File, unsigned LineNumber, 
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+                                  DIFile File, unsigned LineNumber,
                                   uint64_t SizeInBits, uint64_t AlignInBits,
                                   uint64_t OffsetInBits, unsigned Flags,
                                   DIType DerivedFrom, DIArray Elements,
@@ -279,7 +368,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
  // TAG_class_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -298,13 +387,13 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
 
 /// createTemplateTypeParameter - Create debugging information for template
 /// type parameter.
-DITemplateTypeParameter 
+DITemplateTypeParameter
 DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
                                        DIType Ty, MDNode *File, unsigned LineNo,
                                        unsigned ColumnNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     Ty,
     File,
@@ -316,14 +405,14 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
 
 /// createTemplateValueParameter - Create debugging information for template
 /// value parameter.
-DITemplateValueParameter 
+DITemplateValueParameter
 DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
                                         DIType Ty, uint64_t Val,
                                         MDNode *File, unsigned LineNo,
                                         unsigned ColumnNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     Ty,
     ConstantInt::get(Type::getInt64Ty(VMContext), Val),
@@ -335,15 +424,15 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
 }
 
 /// createStructType - Create debugging information entry for a struct.
-DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, 
-                                   DIFile File, unsigned LineNumber, 
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
+                                   DIFile File, unsigned LineNumber,
                                    uint64_t SizeInBits, uint64_t AlignInBits,
-                                   unsigned Flags, DIArray Elements, 
+                                   unsigned Flags, DIArray Elements,
                                    unsigned RunTimeLang) {
  // TAG_structure_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -360,7 +449,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
 }
 
 /// createUnionType - Create debugging information entry for an union.
-DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, 
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
                                   DIFile File,
                                   unsigned LineNumber, uint64_t SizeInBits,
                                   uint64_t AlignInBits, unsigned Flags,
@@ -368,7 +457,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
   // TAG_union_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
-    Scope,
+    getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -389,9 +478,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
   // TAG_subroutine_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
-    File,
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
     MDString::get(VMContext, ""),
-    File,
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -405,16 +494,17 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
   return DIType(MDNode::get(VMContext, Elts));
 }
 
-/// createEnumerationType - Create debugging information entry for an 
+/// createEnumerationType - Create debugging information entry for an
 /// enumeration.
-DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, 
-                                        DIFile File, unsigned LineNumber, 
-                                        uint64_t SizeInBits, 
-                                        uint64_t AlignInBits, DIArray Elements) {
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
+                                        DIFile File, unsigned LineNumber,
+                                        uint64_t SizeInBits,
+                                        uint64_t AlignInBits,
+					DIArray Elements) {
   // TAG_enumeration_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
-    Scope,
+    getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -428,20 +518,19 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
-  NMD->addOperand(Node);
+  AllEnumTypes.push_back(Node);
   return DIType(Node);
 }
 
 /// createArrayType - Create debugging information entry for an array.
-DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, 
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
                                   DIType Ty, DIArray Subscripts) {
   // TAG_array_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
-    TheCU,
+    NULL, //TheCU,
     MDString::get(VMContext, ""),
-    TheCU,
+    NULL, //TheCU,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), Size),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -456,14 +545,14 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
 }
 
 /// createVectorType - Create debugging information entry for a vector.
-DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, 
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
                                    DIType Ty, DIArray Subscripts) {
   // TAG_vector_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
-    TheCU,
+    NULL, //TheCU,
     MDString::get(VMContext, ""),
-    TheCU,
+    NULL, //TheCU,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), Size),
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -501,18 +590,17 @@ DIType DIBuilder::createArtificialType(DIType Ty) {
   return DIType(MDNode::get(VMContext, Elts));
 }
 
-/// retainType - Retain DIType in a module even if it is not referenced 
+/// retainType - Retain DIType in a module even if it is not referenced
 /// through debug info anchors.
 void DIBuilder::retainType(DIType T) {
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
-  NMD->addOperand(T);
+  AllRetainTypes.push_back(T);
 }
 
 /// createUnspecifiedParameter - Create unspeicified type descriptor
 /// for the subroutine type.
 DIDescriptor DIBuilder::createUnspecifiedParameter() {
-  Value *Elts[] = { 
-    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
   };
   return DIDescriptor(MDNode::get(VMContext, Elts));
 }
@@ -532,7 +620,7 @@ DIType DIBuilder::createTemporaryType(DIFile F) {
   // use here as long as DIType accepts it.
   Value *Elts[] = {
     GetTagConstant(VMContext, DW_TAG_base_type),
-    F.getCompileUnit(),
+    TheCU,
     NULL,
     F
   };
@@ -563,12 +651,12 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
 
 /// createGlobalVariable - Create a new descriptor for the specified global.
 DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, 
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
                      DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_variable),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    TheCU,
+    NULL, // TheCU,
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
@@ -580,22 +668,20 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
     Val
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
-  NMD->addOperand(Node);
+  AllGVs.push_back(Node);
   return DIGlobalVariable(Node);
 }
 
 /// createStaticVariable - Create a new descriptor for the specified static
 /// variable.
 DIGlobalVariable DIBuilder::
-createStaticVariable(DIDescriptor Context, StringRef Name, 
-                     StringRef LinkageName, DIFile F, unsigned LineNumber, 
+createStaticVariable(DIDescriptor Context, StringRef Name,
+                     StringRef LinkageName, DIFile F, unsigned LineNumber,
                      DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_variable),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, LinkageName),
@@ -607,21 +693,19 @@ createStaticVariable(DIDescriptor Context, StringRef Name,
     Val
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
-  NMD->addOperand(Node);
+  AllGVs.push_back(Node);
   return DIGlobalVariable(Node);
 }
 
 /// createVariable - Create a new descriptor for the specified variable.
 DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
                                           StringRef Name, DIFile File,
-                                          unsigned LineNo, DIType Ty, 
+                                          unsigned LineNo, DIType Ty,
                                           bool AlwaysPreserve, unsigned Flags,
                                           unsigned ArgNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, Tag),
-    Scope,
+    getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
@@ -635,13 +719,7 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
     // to preserve variable info in such situation then stash it in a
     // named mdnode.
     DISubprogram Fn(getDISubprogram(Scope));
-    StringRef FName = "fn";
-    if (Fn.getFunction())
-      FName = Fn.getFunction()->getName();
-    char One = '\1';
-    if (FName.startswith(StringRef(&One, 1)))
-      FName = FName.substr(1);
-    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
     FnLocals->addOperand(Node);
   }
   return DIVariable(Node);
@@ -656,10 +734,11 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
                                             unsigned ArgNo) {
   SmallVector<Value *, 15> Elts;
   Elts.push_back(GetTagConstant(VMContext, Tag));
-  Elts.push_back(Scope);
+  Elts.push_back(getNonCompileUnitScope(Scope)),
   Elts.push_back(MDString::get(VMContext, Name));
   Elts.push_back(F);
-  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
+				  (LineNo | (ArgNo << 24))));
   Elts.push_back(Ty);
   Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
   Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
@@ -679,10 +758,15 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
                                        Function *Fn,
                                        MDNode *TParams,
                                        MDNode *Decl) {
+  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  MDNode *Temp = MDNode::getTemporary(VMContext, TElts);
+  Value *TVElts[] = { Temp };
+  MDNode *THolder = MDNode::get(VMContext, TVElts);
+
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, LinkageName),
@@ -698,13 +782,13 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     Fn,
     TParams,
-    Decl
+    Decl,
+    THolder
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
 
   // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
-  NMD->addOperand(Node);
+  AllSubprograms.push_back(Node);
   return DISubprogram(Node);
 }
 
@@ -722,10 +806,15 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
                                      bool isOptimized,
                                      Function *Fn,
                                      MDNode *TParam) {
+  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  MDNode *Temp = MDNode::getTemporary(VMContext, TElts);
+  Value *TVElts[] = { Temp };
+  MDNode *THolder = MDNode::get(VMContext, TVElts);
+
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    Context,
+    getNonCompileUnitScope(Context),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, Name),
     MDString::get(VMContext, LinkageName),
@@ -741,12 +830,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     Fn,
     TParam,
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    THolder
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
-
-  // Create a named metadata so that we do not lose this mdnode.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
-  NMD->addOperand(Node);
   return DISubprogram(Node);
 }
 
@@ -756,7 +843,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
                                        DIFile File, unsigned LineNo) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
-    Scope,
+    getNonCompileUnitScope(Scope),
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
@@ -764,13 +851,25 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
   return DINameSpace(MDNode::get(VMContext, Elts));
 }
 
+/// createLexicalBlockFile - This creates a new MDNode that encapsulates
+/// an existing scope with a new filename.
+DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
+						     DIFile File) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    Scope,
+    File
+  };
+  return DILexicalBlockFile(MDNode::get(VMContext, Elts));
+}
+
 DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
                                              unsigned Line, unsigned Col) {
   // Defeat MDNode uniqing for lexical blocks by using unique id.
   static unsigned int unique_id = 0;
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
-    Scope,
+    getNonCompileUnitScope(Scope),
     ConstantInt::get(Type::getInt32Ty(VMContext), Line),
     ConstantInt::get(Type::getInt32Ty(VMContext), Col),
     File,
@@ -838,4 +937,3 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                     VarInfo };
   return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
 }
-
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index b23c3514d0..cd832abeba 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -171,7 +171,7 @@ static bool getLocationInfo(const Value *V, std::string &DisplayName,
 
 void PrintDbgInfo::printVariableDeclaration(const Value *V) {
   std::string DisplayName, File, Directory, Type;
-  unsigned LineNo;
+  unsigned LineNo = 0;
 
   if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
     return;
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 4dcf04ef21..640ad95e5c 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -39,6 +39,9 @@ DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
 DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
 }
 
+DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) {
+}
+
 DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
 }
 
@@ -116,6 +119,12 @@ unsigned DIVariable::getNumAddrElements() const {
   return DbgNode->getNumOperands()-8;
 }
 
+/// getInlinedAt - If this variable is inlined then return inline location.
+MDNode *DIVariable::getInlinedAt() const {
+  if (getVersion() <= llvm::LLVMDebugVersion9)
+    return NULL;
+  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7));
+}
 
 //===----------------------------------------------------------------------===//
 // Predicates
@@ -124,7 +133,14 @@ unsigned DIVariable::getNumAddrElements() const {
 /// isBasicType - Return true if the specified tag is legal for
 /// DIBasicType.
 bool DIDescriptor::isBasicType() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_base_type;
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_base_type:
+  case dwarf::DW_TAG_unspecified_type:
+    return true;
+  default:
+    return false;
+  }
 }
 
 /// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
@@ -250,9 +266,17 @@ bool DIDescriptor::isNameSpace() const {
   return DbgNode && getTag() == dwarf::DW_TAG_namespace;
 }
 
+/// isLexicalBlockFile - Return true if the specified descriptor is a
+/// lexical block with an extra file.
+bool DIDescriptor::isLexicalBlockFile() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+    (DbgNode->getNumOperands() == 3);
+}
+
 /// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
 bool DIDescriptor::isLexicalBlock() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block;
+  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+    (DbgNode->getNumOperands() > 3);
 }
 
 /// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
@@ -322,6 +346,22 @@ void DIType::replaceAllUsesWith(MDNode *D) {
   }
 }
 
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+bool DIType::isUnsignedDIType() {
+  DIDerivedType DTy(DbgNode);
+  if (DTy.Verify())
+    return DTy.getTypeDerivedFrom().isUnsignedDIType();
+
+  DIBasicType BTy(DbgNode);
+  if (BTy.Verify()) {
+    unsigned Encoding = BTy.getEncoding();
+    if (Encoding == dwarf::DW_ATE_unsigned ||
+        Encoding == dwarf::DW_ATE_unsigned_char)
+      return true;
+  }
+  return false;
+}
+
 /// Verify - Verify that a compile unit is well formed.
 bool DICompileUnit::Verify() const {
   if (!DbgNode)
@@ -337,7 +377,7 @@ bool DICompileUnit::Verify() const {
 bool DIType::Verify() const {
   if (!DbgNode)
     return false;
-  if (!getContext().Verify())
+  if (getContext() && !getContext().Verify())
     return false;
   unsigned Tag = getTag();
   if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
@@ -345,6 +385,7 @@ bool DIType::Verify() const {
       Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type 
       && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
       && Tag != dwarf::DW_TAG_enumeration_type 
+      && Tag != dwarf::DW_TAG_subroutine_type
       && getFilename().empty())
     return false;
   return true;
@@ -364,12 +405,9 @@ bool DIDerivedType::Verify() const {
 bool DICompositeType::Verify() const {
   if (!DbgNode)
     return false;
-  if (!getContext().Verify())
+  if (getContext() && !getContext().Verify())
     return false;
 
-  DICompileUnit CU = getCompileUnit();
-  if (!CU.Verify())
-    return false;
   return true;
 }
 
@@ -378,11 +416,7 @@ bool DISubprogram::Verify() const {
   if (!DbgNode)
     return false;
 
-  if (!getContext().Verify())
-    return false;
-
-  DICompileUnit CU = getCompileUnit();
-  if (!CU.Verify())
+  if (getContext() && !getContext().Verify())
     return false;
 
   DICompositeType Ty = getType();
@@ -399,11 +433,7 @@ bool DIGlobalVariable::Verify() const {
   if (getDisplayName().empty())
     return false;
 
-  if (!getContext().Verify())
-    return false;
-
-  DICompileUnit CU = getCompileUnit();
-  if (!CU.Verify())
+  if (getContext() && !getContext().Verify())
     return false;
 
   DIType Ty = getType();
@@ -421,10 +451,7 @@ bool DIVariable::Verify() const {
   if (!DbgNode)
     return false;
 
-  if (!getContext().Verify())
-    return false;
-
-  if (!getCompileUnit().Verify())
+  if (getContext() && !getContext().Verify())
     return false;
 
   DIType Ty = getType();
@@ -448,8 +475,6 @@ bool DINameSpace::Verify() const {
     return false;
   if (getName().empty())
     return false;
-  if (!getCompileUnit().Verify())
-    return false;
   return true;
 }
 
@@ -506,9 +531,28 @@ unsigned DISubprogram::isOptimized() const {
   return 0;
 }
 
+MDNode *DISubprogram::getVariablesNodes() const {
+  if (!DbgNode || DbgNode->getNumOperands() <= 19)
+    return NULL;
+  if (MDNode *Temp = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+    return dyn_cast_or_null<MDNode>(Temp->getOperand(0));
+  return NULL;
+}
+
+DIArray DISubprogram::getVariables() const {
+  if (!DbgNode || DbgNode->getNumOperands() <= 19)
+    return DIArray();
+  if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+    if (MDNode *A = dyn_cast_or_null<MDNode>(T->getOperand(0)))
+      return DIArray(A);
+  return DIArray();
+}
+
 StringRef DIScope::getFilename() const {
   if (!DbgNode)
     return StringRef();
+  if (isLexicalBlockFile())
+    return DILexicalBlockFile(DbgNode).getFilename();
   if (isLexicalBlock())
     return DILexicalBlock(DbgNode).getFilename();
   if (isSubprogram())
@@ -528,6 +572,8 @@ StringRef DIScope::getFilename() const {
 StringRef DIScope::getDirectory() const {
   if (!DbgNode)
     return StringRef();
+  if (isLexicalBlockFile())
+    return DILexicalBlockFile(DbgNode).getDirectory();
   if (isLexicalBlock())
     return DILexicalBlock(DbgNode).getDirectory();
   if (isSubprogram())
@@ -544,6 +590,47 @@ StringRef DIScope::getDirectory() const {
   return StringRef();
 }
 
+DIArray DICompileUnit::getEnumTypes() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 14)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+      return DIArray(A);
+  return DIArray();
+}
+
+DIArray DICompileUnit::getRetainedTypes() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 14)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+      return DIArray(A);
+  return DIArray();
+}
+
+DIArray DICompileUnit::getSubprograms() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 14)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
+    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+      return DIArray(A);
+  return DIArray();
+}
+
+
+DIArray DICompileUnit::getGlobalVariables() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 14)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13)))
+    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+      return DIArray(A);
+  return DIArray();
+}
+
 //===----------------------------------------------------------------------===//
 // DIDescriptor: dump routines for all descriptors.
 //===----------------------------------------------------------------------===//
@@ -575,7 +662,6 @@ void DIType::print(raw_ostream &OS) const {
   OS << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
-  getCompileUnit().print(OS);
   OS << " ["
          << "line " << getLineNumber() << ", "
          << getSizeInBits() << " bits, "
@@ -631,7 +717,6 @@ void DISubprogram::print(raw_ostream &OS) const {
   OS << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
-  getCompileUnit().print(OS);
   OS << " [" << getLineNumber() << "] ";
 
   if (isLocalToUnit())
@@ -654,7 +739,6 @@ void DIGlobalVariable::print(raw_ostream &OS) const {
   OS << " [" << dwarf::TagString(Tag) << "] ";
 
   // TODO : Print context
-  getCompileUnit().print(OS);
   OS << " [" << getLineNumber() << "] ";
 
   if (isLocalToUnit())
@@ -668,13 +752,48 @@ void DIGlobalVariable::print(raw_ostream &OS) const {
   OS << "]\n";
 }
 
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+                          const LLVMContext &Ctx) {
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(Ctx));
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (Scope.Verify())
+      CommentOS << Scope.getFilename();
+    else
+      CommentOS << "<unknown>";
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      CommentOS << " @[ ";
+      printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+      CommentOS << " ]";
+    }
+  }
+}
+
+void DIVariable::printExtendedName(raw_ostream &OS) const {
+  const LLVMContext &Ctx = DbgNode->getContext();
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << Res << "," << getLineNumber();
+  if (MDNode *InlinedAt = getInlinedAt()) {
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+    if (!InlinedAtDL.isUnknown()) {
+      OS << " @[";
+      printDebugLoc(InlinedAtDL, OS, Ctx);
+      OS << "]";
+    }
+  }
+}
+
 /// print - Print variable.
 void DIVariable::print(raw_ostream &OS) const {
   StringRef Res = getName();
   if (!Res.empty())
     OS << " [" << Res << "] ";
 
-  getCompileUnit().print(OS);
   OS << " [" << getLineNumber() << "] ";
   getType().print(OS);
   OS << "\n";
@@ -746,19 +865,34 @@ static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
 
 /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
 /// suitable to hold function specific information.
-NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
   SmallString<32> Name = StringRef("llvm.dbg.lv.");
-  fixupObjcLikeName(FuncName, Name);
-
+  StringRef FName = "fn";
+  if (Fn.getFunction())
+    FName = Fn.getFunction()->getName();
+  else
+    FName = Fn.getName();
+  char One = '\1';
+  if (FName.startswith(StringRef(&One, 1)))
+    FName = FName.substr(1);
+  fixupObjcLikeName(FName, Name);
   return M.getNamedMetadata(Name.str());
 }
 
 /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
 /// to hold function specific information.
-NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
   SmallString<32> Name = StringRef("llvm.dbg.lv.");
-  fixupObjcLikeName(FuncName, Name);
-
+  StringRef FName = "fn";
+  if (Fn.getFunction())
+    FName = Fn.getFunction()->getName();
+  else
+    FName = Fn.getName();
+  char One = '\1';
+  if (FName.startswith(StringRef(&One, 1)))
+    FName = FName.substr(1);
+  fixupObjcLikeName(FName, Name);
+  
   return M.getOrInsertNamedMetadata(Name.str());
 }
 
@@ -793,6 +927,31 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
 
 /// processModule - Process entire module and collect debug info.
 void DebugInfoFinder::processModule(Module &M) {
+  if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
+    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+      DICompileUnit CU(CU_Nodes->getOperand(i));
+      addCompileUnit(CU);
+      if (CU.getVersion() > LLVMDebugVersion10) {
+	DIArray GVs = CU.getGlobalVariables();
+	for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+	  DIGlobalVariable DIG(GVs.getElement(i));
+	  if (addGlobalVariable(DIG))
+	    processType(DIG.getType());
+	}
+	DIArray SPs = CU.getSubprograms();
+	for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+	  processSubprogram(DISubprogram(SPs.getElement(i)));
+	DIArray EnumTypes = CU.getEnumTypes();
+	for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+	  processType(DIType(EnumTypes.getElement(i)));
+	DIArray RetainedTypes = CU.getRetainedTypes();
+	for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+	  processType(DIType(RetainedTypes.getElement(i)));
+	return;
+      }
+    }
+  }
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
       for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
@@ -811,6 +970,10 @@ void DebugInfoFinder::processModule(Module &M) {
           addCompileUnit(DICompileUnit(Scope));
         else if (Scope.isSubprogram())
           processSubprogram(DISubprogram(Scope));
+        else if (Scope.isLexicalBlockFile()) {
+          DILexicalBlockFile DBF = DILexicalBlockFile(Scope);
+          processLexicalBlock(DILexicalBlock(DBF.getScope()));
+        }
         else if (Scope.isLexicalBlock())
           processLexicalBlock(DILexicalBlock(Scope));
 
@@ -822,7 +985,8 @@ void DebugInfoFinder::processModule(Module &M) {
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
       if (addGlobalVariable(DIG)) {
-        addCompileUnit(DIG.getCompileUnit());
+        if (DIG.getVersion() <= LLVMDebugVersion10)
+          addCompileUnit(DIG.getCompileUnit());
         processType(DIG.getType());
       }
     }
@@ -843,6 +1007,10 @@ void DebugInfoFinder::processLocation(DILocation Loc) {
     processSubprogram(DISubprogram(S));
   else if (S.isLexicalBlock())
     processLexicalBlock(DILexicalBlock(S));
+  else if (S.isLexicalBlockFile()) {
+    DILexicalBlockFile DBF = DILexicalBlockFile(S);
+    processLexicalBlock(DILexicalBlock(DBF.getScope()));
+  }
   processLocation(Loc.getOrigLocation());
 }
 
@@ -850,8 +1018,8 @@ void DebugInfoFinder::processLocation(DILocation Loc) {
 void DebugInfoFinder::processType(DIType DT) {
   if (!addType(DT))
     return;
-
-  addCompileUnit(DT.getCompileUnit());
+  if (DT.getVersion() <= LLVMDebugVersion10)
+    addCompileUnit(DT.getCompileUnit());
   if (DT.isCompositeType()) {
     DICompositeType DCT(DT);
     processType(DCT.getTypeDerivedFrom());
@@ -874,6 +1042,10 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
   DIScope Context = LB.getContext();
   if (Context.isLexicalBlock())
     return processLexicalBlock(DILexicalBlock(Context));
+  else if (Context.isLexicalBlockFile()) {
+    DILexicalBlockFile DBF = DILexicalBlockFile(Context);
+    return processLexicalBlock(DILexicalBlock(DBF.getScope()));
+  }
   else
     return processSubprogram(DISubprogram(Context));
 }
@@ -882,7 +1054,8 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
 void DebugInfoFinder::processSubprogram(DISubprogram SP) {
   if (!addSubprogram(SP))
     return;
-  addCompileUnit(SP.getCompileUnit());
+  if (SP.getVersion() <= LLVMDebugVersion10)
+    addCompileUnit(SP.getCompileUnit());
   processType(SP.getType());
 }
 
@@ -897,8 +1070,8 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
 
   if (!NodesSeen.insert(DV))
     return;
-
-  addCompileUnit(DIVariable(N).getCompileUnit());
+  if (DIVariable(N).getVersion() <= LLVMDebugVersion10)
+    addCompileUnit(DIVariable(N).getCompileUnit());
   processType(DIVariable(N).getType());
 }
 
@@ -956,6 +1129,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
   if (D.isSubprogram())
     return DISubprogram(Scope);
 
+  if (D.isLexicalBlockFile())
+    return getDISubprogram(DILexicalBlockFile(Scope).getContext());
+  
   if (D.isLexicalBlock())
     return getDISubprogram(DILexicalBlock(Scope).getContext());
 
@@ -972,3 +1148,17 @@ DICompositeType llvm::getDICompositeType(DIType T) {
 
   return DICompositeType();
 }
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+bool llvm::isSubprogramContext(const MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext());
+  return false;
+}
+
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 8ffef29870..eae83fdc36 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -5,3 +5,9 @@ add_llvm_library(LLVMipa
   GlobalsModRef.cpp
   IPA.cpp
   )
+
+add_llvm_library_dependencies(LLVMipa
+  LLVMAnalysis
+  LLVMCore
+  LLVMSupport
+  )
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 659ffab0c6..963da75234 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -44,8 +44,8 @@ namespace {
 class CGPassManager : public ModulePass, public PMDataManager {
 public:
   static char ID;
-  explicit CGPassManager(int Depth) 
-    : ModulePass(ID), PMDataManager(Depth) { }
+  explicit CGPassManager() 
+    : ModulePass(ID), PMDataManager() { }
 
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
@@ -350,6 +350,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
            dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
          }
         );
+  (void)MadeChange;
 
   return DevirtualizedCall;
 }
@@ -542,7 +543,7 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
     PMDataManager *PMD = PMS.top();
 
     // [1] Create new Call Graph Pass Manager
-    CGP = new CGPassManager(PMD->getDepth() + 1);
+    CGP = new CGPassManager();
 
     // [2] Set up new manager's top level manager
     PMTopLevelManager *TPM = PMD->getTopLevelManager();
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index e5f0a77ab6..d0ca8920ab 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -146,7 +146,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
                                    ISE, User, I,
                                    NewUse.PostIncLoops,
                                    *SE, *DT);
-      DEBUG(dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
+      DEBUG(if (SE->getSCEV(I) != ISE)
+              dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
     }
   }
   return true;
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index efde5984c1..40ac9a211a 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/CallingConv.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
@@ -24,13 +25,13 @@ using namespace llvm;
 /// TODO: Perhaps calls like memcpy, strcpy, etc?
 bool llvm::callIsSmall(const Function *F) {
   if (!F) return false;
-  
+
   if (F->hasLocalLinkage()) return false;
-  
+
   if (!F->hasName()) return false;
-  
+
   StringRef Name = F->getName();
-  
+
   // These will all likely lower to a single selection DAG node.
   if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
       Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
@@ -38,7 +39,7 @@ bool llvm::callIsSmall(const Function *F) {
       Name == "cos" || Name == "cosf" || Name == "cosl" ||
       Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
     return true;
-  
+
   // These are all likely to be optimized into something smaller.
   if (Name == "pow" || Name == "powf" || Name == "powl" ||
       Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
@@ -46,13 +47,14 @@ bool llvm::callIsSmall(const Function *F) {
       Name == "round" || Name == "ffs" || Name == "ffsl" ||
       Name == "abs" || Name == "labs" || Name == "llabs")
     return true;
-  
+
   return false;
 }
 
 /// analyzeBasicBlock - Fill in the current structure with information gleaned
 /// from the specified block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+                                    const TargetData *TD) {
   ++NumBlocks;
   unsigned NumInstsBeforeThisBB = NumInsts;
   for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
@@ -67,8 +69,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
       ImmutableCallSite CS(cast<Instruction>(II));
 
       if (const Function *F = CS.getCalledFunction()) {
-        // If a function is both internal and has a single use, then it is 
-        // extremely likely to get inlined in the future (it was probably 
+        // If a function is both internal and has a single use, then it is
+        // extremely likely to get inlined in the future (it was probably
         // exposed by an interleaved devirtualization pass).
         if (F->hasInternalLinkage() && F->hasOneUse())
           ++NumInlineCandidates;
@@ -91,20 +93,25 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
           ++NumCalls;
       }
     }
-    
+
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
       if (!AI->isStaticAlloca())
         this->usesDynamicAlloca = true;
     }
 
     if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
-      ++NumVectorInsts; 
-    
+      ++NumVectorInsts;
+
     if (const CastInst *CI = dyn_cast<CastInst>(II)) {
       // Noop casts, including ptr <-> int,  don't count.
-      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
+      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
           isa<PtrToIntInst>(CI))
         continue;
+      // trunc to a native type is free (assuming the target has compare and
+      // shift-right of the same width).
+      if (isa<TruncInst>(CI) && TD &&
+          TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+        continue;
       // Result of a cmp instruction is often extended (to be used by other
       // cmp instructions, logical or return instructions). These are usually
       // nop on most sane targets.
@@ -119,10 +126,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
 
     ++NumInsts;
   }
-  
+
   if (isa<ReturnInst>(BB->getTerminator()))
     ++NumRets;
-  
+
   // We never want to inline functions that contain an indirectbr.  This is
   // incorrect because all the blockaddress's (in static global initializers
   // for example) would be referring to the original function, and this indirect
@@ -217,23 +224,23 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
 
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
-void CodeMetrics::analyzeFunction(Function *F) {
-  // If this function contains a call to setjmp or _setjmp, never inline
-  // it.  This is a hack because we depend on the user marking their local
-  // variables as volatile if they are live across a setjmp call, and they
-  // probably won't do this in callers.
-  if (F->callsFunctionThatReturnsTwice())
-    callsSetJmp = true;
+void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
+  // If this function contains a call that "returns twice" (e.g., setjmp or
+  // _setjmp), never inline it. This is a hack because we depend on the user
+  // marking their local variables as volatile if they are live across a setjmp
+  // call, and they probably won't do this in callers.
+  callsSetJmp = F->callsFunctionThatReturnsTwice();
 
   // Look at the size of the callee.
   for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    analyzeBasicBlock(&*BB);
+    analyzeBasicBlock(&*BB, TD);
 }
 
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
-void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
-  Metrics.analyzeFunction(F);
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
+                                                       const TargetData *TD) {
+  Metrics.analyzeFunction(F, TD);
 
   // A function with exactly one return has it removed during the inlining
   // process (see InlineFunction), so don't count it.
@@ -252,7 +259,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
 /// NeverInline - returns true if the function should never be inlined into
 /// any caller
 bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
-  return (Metrics.callsSetJmp || Metrics.isRecursive || 
+  return (Metrics.callsSetJmp || Metrics.isRecursive ||
           Metrics.containsIndirectBr);
 }
 // getSpecializationBonus - The heuristic used to determine the per-call
@@ -263,19 +270,19 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
 {
   if (Callee->mayBeOverridden())
     return 0;
-  
+
   int Bonus = 0;
   // If this function uses the coldcc calling convention, prefer not to
   // specialize it.
   if (Callee->getCallingConv() == CallingConv::Cold)
     Bonus -= InlineConstants::ColdccPenalty;
-  
+
   // Get information about the callee.
   FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-  
+
   // If we haven't calculated this information yet, do so now.
   if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee);
+    CalleeFI->analyzeFunction(Callee, TD);
 
   unsigned ArgNo = 0;
   unsigned i = 0;
@@ -286,7 +293,7 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
       Bonus += CountBonusForConstant(I);
     }
 
-  // Calls usually take a long time, so they make the specialization gain 
+  // Calls usually take a long time, so they make the specialization gain
   // smaller.
   Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
 
@@ -300,13 +307,13 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
 // inlining because we decide we don't want to give a bonus for
 // devirtualizing.
 int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
-  
+
   // This could just be NULL.
   if (!C) return 0;
-  
+
   Function *F = dyn_cast<Function>(C);
   if (!F) return 0;
-  
+
   int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
   return (Bonus > 0) ? 0 : Bonus;
 }
@@ -355,18 +362,18 @@ int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
         Bonus += CountBonusForConstant(&Inst);
     }
   }
-  
+
   return Bonus;
 }
 
 int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
   // Get information about the callee.
   FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-  
+
   // If we haven't calculated this information yet, do so now.
   if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee);
-  
+    CalleeFI->analyzeFunction(Callee, TD);
+
   // InlineCost - This value measures how good of an inline candidate this call
   // site is to inline.  A lower inline cost make is more likely for the call to
   // be inlined.  This value may go negative.
@@ -392,9 +399,9 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
     // weights calculated for the callee to determine how much will be folded
     // away with this information.
     else if (isa<Constant>(I))
-      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;       
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
   }
-  
+
   // Each argument passed in has a cost at both the caller and the callee
   // sides.  Measurements show that each argument costs about the same as an
   // instruction.
@@ -408,28 +415,28 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
 
   // Look at the size of the callee. Each instruction counts as 5.
   InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
-  
+
   return InlineCost;
 }
 
 int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
   // Get information about the callee.
   FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-  
+
   // If we haven't calculated this information yet, do so now.
   if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee);
-    
+    CalleeFI->analyzeFunction(Callee, TD);
+
   bool isDirectCall = CS.getCalledFunction() == Callee;
   Instruction *TheCall = CS.getInstruction();
   int Bonus = 0;
-  
+
   // If there is only one call of the function, and it has internal linkage,
   // make it almost guaranteed to be inlined.
   //
   if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
     Bonus += InlineConstants::LastCallToStaticBonus;
-  
+
   // If the instruction after the call, or if the normal destination of the
   // invoke is an unreachable instruction, the function is noreturn.  As such,
   // there is little point in inlining this.
@@ -438,12 +445,12 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
       Bonus += InlineConstants::NoreturnPenalty;
   } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
     Bonus += InlineConstants::NoreturnPenalty;
-  
+
   // If this function uses the coldcc calling convention, prefer not to inline
   // it.
   if (Callee->getCallingConv() == CallingConv::Cold)
     Bonus += InlineConstants::ColdccPenalty;
-  
+
   // Add to the inline quality for properties that make the call valuable to
   // inline.  This includes factors that indicate that the result of inlining
   // the function will be optimizable.  Currently this just looks at arguments
@@ -455,7 +462,7 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
     // Compute any constant bonus due to inlining we want to give here.
     if (isa<Constant>(I))
       Bonus += CountBonusForConstant(FI, cast<Constant>(I));
-      
+
   return Bonus;
 }
 
@@ -483,10 +490,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
 
   // Get information about the callee.
   FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-  
+
   // If we haven't calculated this information yet, do so now.
   if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee);
+    CalleeFI->analyzeFunction(Callee, TD);
 
   // If we should never inline this, return a huge cost.
   if (CalleeFI->NeverInline())
@@ -498,15 +505,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   // requires handling setjmp somewhere else, however.
   if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
     return InlineCost::getAlways();
-    
+
   if (CalleeFI->Metrics.usesDynamicAlloca) {
     // Get information about the caller.
     FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
 
     // If we haven't calculated this information yet, do so now.
     if (CallerFI.Metrics.NumBlocks == 0) {
-      CallerFI.analyzeFunction(Caller);
-     
+      CallerFI.analyzeFunction(Caller, TD);
+
       // Recompute the CalleeFI pointer, getting Caller could have invalidated
       // it.
       CalleeFI = &CachedFunctionInfo[Callee];
@@ -538,16 +545,16 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
   // something else.
   if (Callee->mayBeOverridden())
     return llvm::InlineCost::getNever();
-  
+
   // Get information about the callee.
   FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-  
+
   // If we haven't calculated this information yet, do so now.
   if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee);
+    CalleeFI->analyzeFunction(Callee, TD);
 
   int Cost = 0;
-  
+
   // Look at the original size of the callee.  Each instruction counts as 5.
   Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
 
@@ -564,13 +571,13 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
 // higher threshold to determine if the function call should be inlined.
 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
   Function *Callee = CS.getCalledFunction();
-  
+
   // Get information about the callee.
   FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
-  
+
   // If we haven't calculated this information yet, do so now.
   if (CalleeFI.Metrics.NumBlocks == 0)
-    CalleeFI.analyzeFunction(Callee);
+    CalleeFI.analyzeFunction(Callee, TD);
 
   float Factor = 1.0f;
   // Single BB functions are often written to be inlined.
@@ -604,7 +611,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
     --CallerMetrics.NumCalls;
 
   if (Callee == 0) return;
-  
+
   CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
 
   // If we don't have metrics for the callee, don't recalculate them just to
@@ -614,7 +621,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
     resetCachedCostInfo(Caller);
     return;
   }
-  
+
   // Since CalleeMetrics were already calculated, we know that the CallerMetrics
   // reference isn't invalidated: both were in the DenseMap.
   CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
@@ -636,7 +643,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
     CallerMetrics.NumInsts -= Callee->arg_size();
   else
     CallerMetrics.NumInsts = 0;
-  
+
   // We are not updating the argument weights. We have already determined that
   // Caller is a fairly large function, so we accept the loss of precision.
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 740351ceb8..131cc97d23 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -48,6 +48,26 @@ static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
 static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
                               const DominatorTree *, unsigned);
 
+/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// a vector with every element false, as appropriate for the type.
+static Constant *getFalse(Type *Ty) {
+  assert((Ty->isIntegerTy(1) ||
+          (Ty->isVectorTy() &&
+           cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+         "Expected i1 type or a vector of i1!");
+  return Constant::getNullValue(Ty);
+}
+
+/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// a vector with every element true, as appropriate for the type.
+static Constant *getTrue(Type *Ty) {
+  assert((Ty->isIntegerTy(1) ||
+          (Ty->isVectorTy() &&
+           cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+         "Expected i1 type or a vector of i1!");
+  return Constant::getAllOnesValue(Ty);
+}
+
 /// ValueDominatesPHI - Does the given value dominate the specified phi node?
 static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
   Instruction *I = dyn_cast<Instruction>(V);
@@ -1478,48 +1498,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     default:
       assert(false && "Unknown ICmp predicate!");
     case ICmpInst::ICMP_ULT:
-      // getNullValue also works for vectors, unlike getFalse.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
     case ICmpInst::ICMP_UGE:
-      // getAllOnesValue also works for vectors, unlike getTrue.
-      return ConstantInt::getAllOnesValue(ITy);
+      return getTrue(ITy);
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULE:
       if (isKnownNonZero(LHS, TD))
-        return Constant::getNullValue(ITy);
+        return getFalse(ITy);
       break;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
       if (isKnownNonZero(LHS, TD))
-        return ConstantInt::getAllOnesValue(ITy);
+        return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SLT:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return ConstantInt::getAllOnesValue(ITy);
+        return getTrue(ITy);
       if (LHSKnownNonNegative)
-        return Constant::getNullValue(ITy);
+        return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SLE:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return ConstantInt::getAllOnesValue(ITy);
+        return getTrue(ITy);
       if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
-        return Constant::getNullValue(ITy);
+        return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SGE:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return Constant::getNullValue(ITy);
+        return getFalse(ITy);
       if (LHSKnownNonNegative)
-        return ConstantInt::getAllOnesValue(ITy);
+        return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SGT:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return Constant::getNullValue(ITy);
+        return getFalse(ITy);
       if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
-        return ConstantInt::getAllOnesValue(ITy);
+        return getTrue(ITy);
       break;
     }
   }
@@ -1811,8 +1829,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
-      // getNullValue also works for vectors, unlike getFalse.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
       ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
@@ -1822,8 +1839,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
-      // getAllOnesValue also works for vectors, unlike getTrue.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     }
   }
   if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
@@ -1840,8 +1856,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
-      // getAllOnesValue also works for vectors, unlike getTrue.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
       ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
@@ -1851,8 +1866,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
-      // getNullValue also works for vectors, unlike getFalse.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
     }
   }
 
@@ -1874,7 +1888,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       break;
     case Instruction::Shl: {
-      bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+      bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
       bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
       if (!NUW && !NSW)
         break;
@@ -1955,10 +1969,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
     case CmpInst::ICMP_SGE:
       // Always true.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     case CmpInst::ICMP_SLT:
       // Always false.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
     }
   }
 
@@ -2025,10 +2039,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
     case CmpInst::ICMP_UGE:
       // Always true.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     case CmpInst::ICMP_ULT:
       // Always false.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
     }
   }
 
@@ -2040,40 +2054,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // max(x, ?) pred min(x, ?).
     if (Pred == CmpInst::ICMP_SGE)
       // Always true.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     if (Pred == CmpInst::ICMP_SLT)
       // Always false.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
   } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
              match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
              (A == C || A == D || B == C || B == D)) {
     // min(x, ?) pred max(x, ?).
     if (Pred == CmpInst::ICMP_SLE)
       // Always true.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     if (Pred == CmpInst::ICMP_SGT)
       // Always false.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
   } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
              match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
              (A == C || A == D || B == C || B == D)) {
     // max(x, ?) pred min(x, ?).
     if (Pred == CmpInst::ICMP_UGE)
       // Always true.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     if (Pred == CmpInst::ICMP_ULT)
       // Always false.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
   } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
              match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
              (A == C || A == D || B == C || B == D)) {
     // min(x, ?) pred max(x, ?).
     if (Pred == CmpInst::ICMP_ULE)
       // Always true.
-      return Constant::getAllOnesValue(ITy);
+      return getTrue(ITy);
     if (Pred == CmpInst::ICMP_UGT)
       // Always false.
-      return Constant::getNullValue(ITy);
+      return getFalse(ITy);
   }
 
   // If the comparison is with the result of a select instruction, check whether
@@ -2230,8 +2244,7 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
 
   if (isa<UndefValue>(Ops[0])) {
     // Compute the (pointer) type returned by the GEP instruction.
-    Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.data() + 1,
-                                                       Ops.size() - 1);
+    Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
     Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
     return UndefValue::get(GEPTy);
   }
@@ -2254,9 +2267,37 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
     if (!isa<Constant>(Ops[i]))
       return 0;
 
-  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
-                                        (Constant *const*)Ops.data() + 1,
-                                        Ops.size() - 1);
+  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+}
+
+/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
+/// can fold the result.  If not, this returns null.
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                     ArrayRef<unsigned> Idxs,
+                                     const TargetData *,
+                                     const DominatorTree *) {
+  if (Constant *CAgg = dyn_cast<Constant>(Agg))
+    if (Constant *CVal = dyn_cast<Constant>(Val))
+      return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
+
+  // insertvalue x, undef, n -> x
+  if (match(Val, m_Undef()))
+    return Agg;
+
+  // insertvalue x, (extractvalue y, n), n
+  if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val))
+    if (EV->getAggregateOperand()->getType() == Agg->getType() &&
+        EV->getIndices() == Idxs) {
+      // insertvalue undef, (extractvalue y, n), n -> y
+      if (match(Agg, m_Undef()))
+        return EV->getAggregateOperand();
+
+      // insertvalue y, (extractvalue y, n), n -> y
+      if (Agg == EV->getAggregateOperand())
+        return Agg;
+    }
+
+  return 0;
 }
 
 /// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
@@ -2460,6 +2501,13 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
     Result = SimplifyGEPInst(Ops, TD, DT);
     break;
   }
+  case Instruction::InsertValue: {
+    InsertValueInst *IV = cast<InsertValueInst>(I);
+    Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
+                                     IV->getInsertedValueOperand(),
+                                     IV->getIndices(), TD, DT);
+    break;
+  }
   case Instruction::PHI:
     Result = SimplifyPHINode(cast<PHINode>(I), DT);
     break;
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 18f3a3465b..0e6bcbfae4 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -188,12 +188,16 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
     
     --ScanFrom;
     // If this is a load of Ptr, the loaded value is available.
+    // (This is true even if the load is volatile or atomic, although
+    // those cases are unlikely.)
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
       if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
         return LI;
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // If this is a store through Ptr, the value is available!
+      // (This is true even if the store is volatile or atomic, although
+      // those cases are unlikely.)
       if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
         return SI->getOperand(0);
       
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index c1afe8fbd6..3997ac478b 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -76,7 +76,13 @@ static void GetMemRefInstrs(const Loop *L,
 }
 
 static bool IsLoadOrStoreInst(Value *I) {
-  return isa<LoadInst>(I) || isa<StoreInst>(I);
+  // Returns true if the load or store can be analyzed. Atomic and volatile
+  // operations have properties which this analysis does not understand.
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isUnordered();
+  else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isUnordered();
+  return false;
 }
 
 static Value *GetPointerOperand(Value *I) {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 05831402f4..85aaccaefc 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
@@ -55,12 +56,12 @@ bool Loop::isLoopInvariant(Value *V) const {
 }
 
 /// hasLoopInvariantOperands - Return true if all the operands of the
-/// specified instruction are loop invariant. 
+/// specified instruction are loop invariant.
 bool Loop::hasLoopInvariantOperands(Instruction *I) const {
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (!isLoopInvariant(I->getOperand(i)))
       return false;
-  
+
   return true;
 }
 
@@ -98,6 +99,9 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
     return false;
   if (I->mayReadFromMemory())
     return false;
+  // The landingpad instruction is immobile.
+  if (isa<LandingPadInst>(I))
+    return false;
   // Determine the insertion point, unless one was given.
   if (!InsertPt) {
     BasicBlock *Preheader = getLoopPreheader();
@@ -110,7 +114,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
     if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
       return false;
-  
+
   // Hoist.
   I->moveBefore(InsertPt);
   Changed = true;
@@ -383,6 +387,205 @@ void Loop::dump() const {
 }
 
 //===----------------------------------------------------------------------===//
+// UnloopUpdater implementation
+//
+
+namespace {
+/// Find the new parent loop for all blocks within the "unloop" whose last
+/// backedges has just been removed.
+class UnloopUpdater {
+  Loop *Unloop;
+  LoopInfo *LI;
+
+  LoopBlocksDFS DFS;
+
+  // Map unloop's immediate subloops to their nearest reachable parents. Nested
+  // loops within these subloops will not change parents. However, an immediate
+  // subloop's new parent will be the nearest loop reachable from either its own
+  // exits *or* any of its nested loop's exits.
+  DenseMap<Loop*, Loop*> SubloopParents;
+
+  // Flag the presence of an irreducible backedge whose destination is a block
+  // directly contained by the original unloop.
+  bool FoundIB;
+
+public:
+  UnloopUpdater(Loop *UL, LoopInfo *LInfo) :
+    Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {}
+
+  void updateBlockParents();
+
+  void removeBlocksFromAncestors();
+
+  void updateSubloopParents();
+
+protected:
+  Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop);
+};
+} // end anonymous namespace
+
+/// updateBlockParents - Update the parent loop for all blocks that are directly
+/// contained within the original "unloop".
+void UnloopUpdater::updateBlockParents() {
+  if (Unloop->getNumBlocks()) {
+    // Perform a post order CFG traversal of all blocks within this loop,
+    // propagating the nearest loop from sucessors to predecessors.
+    LoopBlocksTraversal Traversal(DFS, LI);
+    for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+           POE = Traversal.end(); POI != POE; ++POI) {
+
+      Loop *L = LI->getLoopFor(*POI);
+      Loop *NL = getNearestLoop(*POI, L);
+
+      if (NL != L) {
+        // For reducible loops, NL is now an ancestor of Unloop.
+        assert((NL != Unloop && (!NL || NL->contains(Unloop))) &&
+               "uninitialized successor");
+        LI->changeLoopFor(*POI, NL);
+      }
+      else {
+        // Or the current block is part of a subloop, in which case its parent
+        // is unchanged.
+        assert((FoundIB || Unloop->contains(L)) && "uninitialized successor");
+      }
+    }
+  }
+  // Each irreducible loop within the unloop induces a round of iteration using
+  // the DFS result cached by Traversal.
+  bool Changed = FoundIB;
+  for (unsigned NIters = 0; Changed; ++NIters) {
+    assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm");
+
+    // Iterate over the postorder list of blocks, propagating the nearest loop
+    // from successors to predecessors as before.
+    Changed = false;
+    for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(),
+           POE = DFS.endPostorder(); POI != POE; ++POI) {
+
+      Loop *L = LI->getLoopFor(*POI);
+      Loop *NL = getNearestLoop(*POI, L);
+      if (NL != L) {
+        assert(NL != Unloop && (!NL || NL->contains(Unloop)) &&
+               "uninitialized successor");
+        LI->changeLoopFor(*POI, NL);
+        Changed = true;
+      }
+    }
+  }
+}
+
+/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below
+/// their new parents.
+void UnloopUpdater::removeBlocksFromAncestors() {
+  // Remove unloop's blocks from all ancestors below their new parents.
+  for (Loop::block_iterator BI = Unloop->block_begin(),
+         BE = Unloop->block_end(); BI != BE; ++BI) {
+    Loop *NewParent = LI->getLoopFor(*BI);
+    // If this block is an immediate subloop, remove all blocks (including
+    // nested subloops) from ancestors below the new parent loop.
+    // Otherwise, if this block is in a nested subloop, skip it.
+    if (SubloopParents.count(NewParent))
+      NewParent = SubloopParents[NewParent];
+    else if (Unloop->contains(NewParent))
+      continue;
+
+    // Remove blocks from former Ancestors except Unloop itself which will be
+    // deleted.
+    for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent;
+         OldParent = OldParent->getParentLoop()) {
+      assert(OldParent && "new loop is not an ancestor of the original");
+      OldParent->removeBlockFromLoop(*BI);
+    }
+  }
+}
+
+/// updateSubloopParents - Update the parent loop for all subloops directly
+/// nested within unloop.
+void UnloopUpdater::updateSubloopParents() {
+  while (!Unloop->empty()) {
+    Loop *Subloop = *llvm::prior(Unloop->end());
+    Unloop->removeChildLoop(llvm::prior(Unloop->end()));
+
+    assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop");
+    if (SubloopParents[Subloop])
+      SubloopParents[Subloop]->addChildLoop(Subloop);
+    else
+      LI->addTopLevelLoop(Subloop);
+  }
+}
+
+/// getNearestLoop - Return the nearest parent loop among this block's
+/// successors. If a successor is a subloop header, consider its parent to be
+/// the nearest parent of the subloop's exits.
+///
+/// For subloop blocks, simply update SubloopParents and return NULL.
+Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
+
+  // Initially for blocks directly contained by Unloop, NearLoop == Unloop and
+  // is considered uninitialized.
+  Loop *NearLoop = BBLoop;
+
+  Loop *Subloop = 0;
+  if (NearLoop != Unloop && Unloop->contains(NearLoop)) {
+    Subloop = NearLoop;
+    // Find the subloop ancestor that is directly contained within Unloop.
+    while (Subloop->getParentLoop() != Unloop) {
+      Subloop = Subloop->getParentLoop();
+      assert(Subloop && "subloop is not an ancestor of the original loop");
+    }
+    // Get the current nearest parent of the Subloop exits, initially Unloop.
+    if (!SubloopParents.count(Subloop))
+      SubloopParents[Subloop] = Unloop;
+    NearLoop = SubloopParents[Subloop];
+  }
+
+  succ_iterator I = succ_begin(BB), E = succ_end(BB);
+  if (I == E) {
+    assert(!Subloop && "subloop blocks must have a successor");
+    NearLoop = 0; // unloop blocks may now exit the function.
+  }
+  for (; I != E; ++I) {
+    if (*I == BB)
+      continue; // self loops are uninteresting
+
+    Loop *L = LI->getLoopFor(*I);
+    if (L == Unloop) {
+      // This successor has not been processed. This path must lead to an
+      // irreducible backedge.
+      assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB");
+      FoundIB = true;
+    }
+    if (L != Unloop && Unloop->contains(L)) {
+      // Successor is in a subloop.
+      if (Subloop)
+        continue; // Branching within subloops. Ignore it.
+
+      // BB branches from the original into a subloop header.
+      assert(L->getParentLoop() == Unloop && "cannot skip into nested loops");
+
+      // Get the current nearest parent of the Subloop's exits.
+      L = SubloopParents[L];
+      // L could be Unloop if the only exit was an irreducible backedge.
+    }
+    if (L == Unloop) {
+      continue;
+    }
+    // Handle critical edges from Unloop into a sibling loop.
+    if (L && !L->contains(Unloop)) {
+      L = L->getParentLoop();
+    }
+    // Remember the nearest parent loop among successors or subloop exits.
+    if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L))
+      NearLoop = L;
+  }
+  if (Subloop) {
+    SubloopParents[Subloop] = NearLoop;
+    return BBLoop;
+  }
+  return NearLoop;
+}
+
+//===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
 bool LoopInfo::runOnFunction(Function &) {
@@ -391,6 +594,68 @@ bool LoopInfo::runOnFunction(Function &) {
   return false;
 }
 
+/// updateUnloop - The last backedge has been removed from a loop--now the
+/// "unloop". Find a new parent for the blocks contained within unloop and
+/// update the loop tree. We don't necessarily have valid dominators at this
+/// point, but LoopInfo is still valid except for the removal of this loop.
+///
+/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without
+/// checking first is illegal.
+void LoopInfo::updateUnloop(Loop *Unloop) {
+
+  // First handle the special case of no parent loop to simplify the algorithm.
+  if (!Unloop->getParentLoop()) {
+    // Since BBLoop had no parent, Unloop blocks are no longer in a loop.
+    for (Loop::block_iterator I = Unloop->block_begin(),
+         E = Unloop->block_end(); I != E; ++I) {
+
+      // Don't reparent blocks in subloops.
+      if (getLoopFor(*I) != Unloop)
+        continue;
+
+      // Blocks no longer have a parent but are still referenced by Unloop until
+      // the Unloop object is deleted.
+      LI.changeLoopFor(*I, 0);
+    }
+
+    // Remove the loop from the top-level LoopInfo object.
+    for (LoopInfo::iterator I = LI.begin();; ++I) {
+      assert(I != LI.end() && "Couldn't find loop");
+      if (*I == Unloop) {
+        LI.removeLoop(I);
+        break;
+      }
+    }
+
+    // Move all of the subloops to the top-level.
+    while (!Unloop->empty())
+      LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end())));
+
+    return;
+  }
+
+  // Update the parent loop for all blocks within the loop. Blocks within
+  // subloops will not change parents.
+  UnloopUpdater Updater(Unloop, this);
+  Updater.updateBlockParents();
+
+  // Remove blocks from former ancestor loops.
+  Updater.removeBlocksFromAncestors();
+
+  // Add direct subloops as children in their new parent loop.
+  Updater.updateSubloopParents();
+
+  // Remove unloop from its parent loop.
+  Loop *ParentLoop = Unloop->getParentLoop();
+  for (Loop::iterator I = ParentLoop->begin();; ++I) {
+    assert(I != ParentLoop->end() && "Couldn't find loop");
+    if (*I == Unloop) {
+      ParentLoop->removeChildLoop(I);
+      break;
+    }
+  }
+}
+
 void LoopInfo::verifyAnalysis() const {
   // LoopInfo is a FunctionPass, but verifying every loop in the function
   // each time verifyAnalysis is called is very expensive. The
@@ -400,12 +665,21 @@ void LoopInfo::verifyAnalysis() const {
 
   if (!VerifyLoopInfo) return;
 
+  DenseSet<const Loop*> Loops;
   for (iterator I = begin(), E = end(); I != E; ++I) {
     assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
-    (*I)->verifyLoopNest();
+    (*I)->verifyLoopNest(&Loops);
   }
 
-  // TODO: check BBMap consistency.
+  // Verify that blocks are mapped to valid loops.
+  //
+  // FIXME: With an up-to-date DFS (see LoopIterator.h) and DominatorTree, we
+  // could also verify that the blocks are still in the correct loops.
+  for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(),
+         E = LI.BBMap.end(); I != E; ++I) {
+    assert(Loops.count(I->second) && "orphaned loop");
+    assert(I->second->contains(I->first) && "orphaned block");
+  }
 }
 
 void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -417,3 +691,15 @@ void LoopInfo::print(raw_ostream &OS, const Module*) const {
   LI.print(OS);
 }
 
+//===----------------------------------------------------------------------===//
+// LoopBlocksDFS implementation
+//
+
+/// Traverse the loop blocks and store the DFS result.
+/// Useful for clients that just want the final DFS result and don't need to
+/// visit blocks during the initial traversal.
+void LoopBlocksDFS::perform(LoopInfo *LI) {
+  LoopBlocksTraversal Traversal(*this, LI);
+  for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(),
+         POE = Traversal.end(); POI != POE; ++POI) ;
+}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 10e3f297f9..5ba1f4045d 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -59,9 +59,9 @@ char PrintLoopPass::ID = 0;
 static DebugInfoProbeInfo *TheDebugProbe;
 static void createDebugInfoProbe() {
   if (TheDebugProbe) return;
-      
-  // Constructed the first time this is called. This guarantees that the 
-  // object will be constructed, if -enable-debug-info-probe is set, 
+
+  // Constructed the first time this is called. This guarantees that the
+  // object will be constructed, if -enable-debug-info-probe is set,
   // before static globals, thus it will be destroyed before them.
   static ManagedStatic<DebugInfoProbeInfo> DIP;
   TheDebugProbe = &*DIP;
@@ -73,73 +73,29 @@ static void createDebugInfoProbe() {
 
 char LPPassManager::ID = 0;
 
-LPPassManager::LPPassManager(int Depth) 
-  : FunctionPass(ID), PMDataManager(Depth) { 
+LPPassManager::LPPassManager()
+  : FunctionPass(ID), PMDataManager() {
   skipThisLoop = false;
   redoThisLoop = false;
   LI = NULL;
   CurrentLoop = NULL;
 }
 
-/// Delete loop from the loop queue and loop hierarchy (LoopInfo). 
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
 void LPPassManager::deleteLoopFromQueue(Loop *L) {
 
-  if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
-    // Reparent all of the blocks in this loop.  Since BBLoop had a parent,
-    // they are now all in it.
-    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 
-         I != E; ++I)
-      if (LI->getLoopFor(*I) == L)    // Don't change blocks in subloops.
-        LI->changeLoopFor(*I, ParentLoop);
-    
-    // Remove the loop from its parent loop.
-    for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
-         ++I) {
-      assert(I != E && "Couldn't find loop");
-      if (*I == L) {
-        ParentLoop->removeChildLoop(I);
-        break;
-      }
-    }
-    
-    // Move all subloops into the parent loop.
-    while (!L->empty())
-      ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
-  } else {
-    // Reparent all of the blocks in this loop.  Since BBLoop had no parent,
-    // they no longer in a loop at all.
-    
-    for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
-      // Don't change blocks in subloops.
-      if (LI->getLoopFor(L->getBlocks()[i]) == L) {
-        LI->removeBlock(L->getBlocks()[i]);
-        --i;
-      }
-    }
-
-    // Remove the loop from the top-level LoopInfo object.
-    for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
-      assert(I != E && "Couldn't find loop");
-      if (*I == L) {
-        LI->removeLoop(I);
-        break;
-      }
-    }
-
-    // Move all of the subloops to the top-level.
-    while (!L->empty())
-      LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
-  }
-
-  delete L;
+  LI->updateUnloop(L);
 
   // If L is current loop then skip rest of the passes and let
   // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
   // and continue applying other passes on CurrentLoop.
-  if (CurrentLoop == L) {
+  if (CurrentLoop == L)
     skipThisLoop = true;
+
+  delete L;
+
+  if (skipThisLoop)
     return;
-  }
 
   for (std::deque<Loop *>::iterator I = LQ.begin(),
          E = LQ.end(); I != E; ++I) {
@@ -166,10 +122,10 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
 
 void LPPassManager::insertLoopIntoQueue(Loop *L) {
   // Insert L into loop queue
-  if (L == CurrentLoop) 
+  if (L == CurrentLoop)
     redoLoop(L);
   else if (!L->getParentLoop())
-    // This is top level loop. 
+    // This is top level loop.
     LQ.push_front(L);
   else {
     // Insert L after the parent loop.
@@ -195,9 +151,9 @@ void LPPassManager::redoLoop(Loop *L) {
 
 /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
 /// all loop passes.
-void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, 
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
                                                   BasicBlock *To, Loop *L) {
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     LoopPass *LP = getContainedPass(Index);
     LP->cloneBasicBlockAnalysis(From, To, L);
   }
@@ -206,13 +162,13 @@ void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
 /// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
 void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
   if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
-    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; 
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;
          ++BI) {
       Instruction &I = *BI;
       deleteSimpleAnalysisValue(&I, L);
     }
   }
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     LoopPass *LP = getContainedPass(Index);
     LP->deleteAnalysisValue(V, L);
   }
@@ -228,7 +184,7 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
 
 /// Pass Manager itself does not invalidate any analysis info.
 void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
-  // LPPassManager needs LoopInfo. In the long term LoopInfo class will 
+  // LPPassManager needs LoopInfo. In the long term LoopInfo class will
   // become part of LPPassManager.
   Info.addRequired<LoopInfo>();
   Info.setPreservesAll();
@@ -255,7 +211,7 @@ bool LPPassManager::runOnFunction(Function &F) {
   for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
        I != E; ++I) {
     Loop *L = *I;
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
       Changed |= P->doInitialization(L, *this);
     }
@@ -263,13 +219,13 @@ bool LPPassManager::runOnFunction(Function &F) {
 
   // Walk Loops
   while (!LQ.empty()) {
-      
+
     CurrentLoop  = LQ.back();
     skipThisLoop = false;
     redoThisLoop = false;
 
     // Run all passes on the current Loop.
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
       dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
                    CurrentLoop->getHeader()->getName());
@@ -319,23 +275,23 @@ bool LPPassManager::runOnFunction(Function &F) {
         // Do not run other passes on this loop.
         break;
     }
-    
+
     // If the loop was deleted, release all the loop passes. This frees up
     // some memory, and avoids trouble with the pass manager trying to call
     // verifyAnalysis on them.
     if (skipThisLoop)
-      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
         Pass *P = getContainedPass(Index);
         freePass(P, "<deleted>", ON_LOOP_MSG);
       }
 
     // Pop the loop from queue after running all passes.
     LQ.pop_back();
-    
+
     if (redoThisLoop)
       LQ.push_back(CurrentLoop);
   }
-  
+
   // Finalization
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     LoopPass *P = getContainedPass(Index);
@@ -372,7 +328,7 @@ Pass *LoopPass::createPrinterPass(raw_ostream &O,
 // LPPassManger as expected.
 void LoopPass::preparePassManager(PMStack &PMS) {
 
-  // Find LPPassManager 
+  // Find LPPassManager
   while (!PMS.empty() &&
          PMS.top()->getPassManagerType() > PMT_LoopPassManager)
     PMS.pop();
@@ -381,14 +337,14 @@ void LoopPass::preparePassManager(PMStack &PMS) {
   // by other passes that are managed by LPM then do not insert
   // this pass in current LPM. Use new LPPassManager.
   if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
-      !PMS.top()->preserveHigherLevelAnalysis(this)) 
+      !PMS.top()->preserveHigherLevelAnalysis(this))
     PMS.pop();
 }
 
 /// Assign pass manager to manage this pass.
 void LoopPass::assignPassManager(PMStack &PMS,
                                  PassManagerType PreferredType) {
-  // Find LPPassManager 
+  // Find LPPassManager
   while (!PMS.empty() &&
          PMS.top()->getPassManagerType() > PMT_LoopPassManager)
     PMS.pop();
@@ -397,12 +353,12 @@ void LoopPass::assignPassManager(PMStack &PMS,
   if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
     LPPM = (LPPassManager*)PMS.top();
   else {
-    // Create new Loop Pass Manager if it does not exist. 
+    // Create new Loop Pass Manager if it does not exist.
     assert (!PMS.empty() && "Unable to create Loop Pass Manager");
     PMDataManager *PMD = PMS.top();
 
-    // [1] Create new Call Graph Pass Manager
-    LPPM = new LPPassManager(PMD->getDepth() + 1);
+    // [1] Create new Loop Pass Manager
+    LPPM = new LPPassManager();
     LPPM->populateInheritedAnalysis(PMS);
 
     // [2] Set up new manager's top level manager
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 2283db0bc4..fde07ea4f9 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -25,8 +25,17 @@ namespace {
   struct MemDepPrinter : public FunctionPass {
     const Function *F;
 
-    typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
-    typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+    enum DepType {
+      Clobber = 0,
+      Def,
+      NonFuncLocal,
+      Unknown
+    };
+
+    static const char* DepTypeStr[];
+
+    typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair;
+    typedef std::pair<InstTypePair, const BasicBlock *> Dep;
     typedef SmallSetVector<Dep, 4> DepSet;
     typedef DenseMap<const Instruction *, DepSet> DepSetMap;
     DepSetMap Deps;
@@ -50,6 +59,21 @@ namespace {
       Deps.clear();
       F = 0;
     }
+
+  private:
+    static InstTypePair getInstTypePair(MemDepResult dep) {
+      if (dep.isClobber())
+        return InstTypePair(dep.getInst(), Clobber);
+      if (dep.isDef())
+        return InstTypePair(dep.getInst(), Def);
+      if (dep.isNonFuncLocal())
+        return InstTypePair(dep.getInst(), NonFuncLocal);
+      assert(dep.isUnknown() && "unexptected dependence type");
+      return InstTypePair(dep.getInst(), Unknown);
+    }
+    static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
+      return InstTypePair(inst, type);
+    }
   };
 }
 
@@ -64,6 +88,9 @@ FunctionPass *llvm::createMemDepPrinter() {
   return new MemDepPrinter();
 }
 
+const char* MemDepPrinter::DepTypeStr[]
+  = {"Clobber", "Def", "NonFuncLocal", "Unknown"};
+
 bool MemDepPrinter::runOnFunction(Function &F) {
   this->F = &F;
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
@@ -79,10 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 
     MemDepResult Res = MDA.getDependency(Inst);
     if (!Res.isNonLocal()) {
-      assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
-              "Local dep should be unknown, def or clobber!");
-      Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
-                                                          Res.isClobber()),
+      Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
                                        static_cast<BasicBlock *>(0)));
     } else if (CallSite CS = cast<Value>(Inst)) {
       const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
@@ -92,22 +116,26 @@ bool MemDepPrinter::runOnFunction(Function &F) {
       for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
            I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
         const MemDepResult &Res = I->getResult();
-        assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
-                "Resolved non-local call dep should be unknown, def or "
-                "clobber!");
-        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
-                                                          Res.isClobber()),
-                                       I->getBB()));
+        InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
       }
     } else {
       SmallVector<NonLocalDepResult, 4> NLDI;
       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        // FIXME: Volatile is not handled properly here.
+        if (!LI->isUnordered()) {
+          // FIXME: Handle atomic/volatile loads.
+          Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
+                                           static_cast<BasicBlock *>(0)));
+          continue;
+        }
         AliasAnalysis::Location Loc = AA.getLocation(LI);
-        MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
-                                         LI->getParent(), NLDI);
+        MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        // FIXME: Volatile is not handled properly here.
+        if (!LI->isUnordered()) {
+          // FIXME: Handle atomic/volatile stores.
+          Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
+                                           static_cast<BasicBlock *>(0)));
+          continue;
+        }
         AliasAnalysis::Location Loc = AA.getLocation(SI);
         MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
       } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
@@ -121,11 +149,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
       for (SmallVectorImpl<NonLocalDepResult>::const_iterator
            I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
         const MemDepResult &Res = I->getResult();
-        assert(Res.isClobber() != Res.isDef() &&
-               "Resolved non-local pointer dep should be def or clobber!");
-        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
-                                                          Res.isClobber()),
-                                       I->getBB()));
+        InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB()));
       }
     }
   }
@@ -146,26 +170,18 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
     for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
          I != E; ++I) {
       const Instruction *DepInst = I->first.getPointer();
-      bool isClobber = I->first.getInt();
+      DepType type = I->first.getInt();
       const BasicBlock *DepBB = I->second;
 
       OS << "    ";
-      if (!DepInst)
-        OS << "Unknown";
-      else if (isClobber)
-        OS << "Clobber";
-      else
-        OS << "    Def";
+      OS << DepTypeStr[type];
       if (DepBB) {
         OS << " in block ";
         WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
       }
       if (DepInst) {
         OS << " from: ";
-        if (DepInst == Inst)
-          OS << "<unspecified>";
-        else
-          DepInst->print(OS);
+        DepInst->print(OS);
       }
       OS << "\n";
     }
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 34ba92509e..92967c08dc 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -120,21 +120,27 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
                                         AliasAnalysis::Location &Loc,
                                         AliasAnalysis *AA) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-    if (LI->isVolatile()) {
-      Loc = AliasAnalysis::Location();
+    if (LI->isUnordered()) {
+      Loc = AA->getLocation(LI);
+      return AliasAnalysis::Ref;
+    } else if (LI->getOrdering() == Monotonic) {
+      Loc = AA->getLocation(LI);
       return AliasAnalysis::ModRef;
     }
-    Loc = AA->getLocation(LI);
-    return AliasAnalysis::Ref;
+    Loc = AliasAnalysis::Location();
+    return AliasAnalysis::ModRef;
   }
 
   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-    if (SI->isVolatile()) {
-      Loc = AliasAnalysis::Location();
+    if (SI->isUnordered()) {
+      Loc = AA->getLocation(SI);
+      return AliasAnalysis::Mod;
+    } else if (SI->getOrdering() == Monotonic) {
+      Loc = AA->getLocation(SI);
       return AliasAnalysis::ModRef;
     }
-    Loc = AA->getLocation(SI);
-    return AliasAnalysis::Mod;
+    Loc = AliasAnalysis::Location();
+    return AliasAnalysis::ModRef;
   }
 
   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
@@ -232,7 +238,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
   // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getUnknown();
+  return MemDepResult::getNonFuncLocal();
 }
 
 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
@@ -270,8 +276,8 @@ unsigned MemoryDependenceAnalysis::
 getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
                                 unsigned MemLocSize, const LoadInst *LI,
                                 const TargetData &TD) {
-  // We can only extend non-volatile integer loads.
-  if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+  // We can only extend simple integer loads.
+  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
   
   // Get the base of this load.
   int64_t LIOffs = 0;
@@ -369,6 +375,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     // Values depend on loads if the pointers are must aliased.  This means that
     // a load depends on another must aliased load from the same value.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      // Atomic loads have complications involved.
+      // FIXME: This is overly conservative.
+      if (!LI->isUnordered())
+        return MemDepResult::getClobber(LI);
+
       AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
       
       // If we found a pointer, check if it could be the same as our pointer.
@@ -424,6 +435,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     }
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // Atomic stores have complications involved.
+      // FIXME: This is overly conservative.
+      if (!SI->isUnordered())
+        return MemDepResult::getClobber(SI);
+
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
@@ -483,7 +499,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
   // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getUnknown();
+  return MemDepResult::getNonFuncLocal();
 }
 
 /// getDependency - Return the instruction on which a memory operation
@@ -516,7 +532,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
       LocalCache = MemDepResult::getNonLocal();
     else
-      LocalCache = MemDepResult::getUnknown();
+      LocalCache = MemDepResult::getNonFuncLocal();
   } else {
     AliasAnalysis::Location MemLoc;
     AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -672,7 +688,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       // a clobber, otherwise it is unknown.
       Dep = MemDepResult::getNonLocal();
     } else {
-      Dep = MemDepResult::getUnknown();
+      Dep = MemDepResult::getNonFuncLocal();
     }
     
     // If we had a dirty entry for the block, update it.  Otherwise, just add
@@ -790,7 +806,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
   // to Cache!
-  if (Dep.isNonLocal() || Dep.isUnknown())
+  if (!Dep.isDef() && !Dep.isClobber())
     return Dep;
   
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 05476115bc..7e22ddc61c 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -407,9 +407,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
     }
 
     GetElementPtrInst *Result =
-    GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
-                              InVal->getName()+".phi.trans.insert",
-                              PredBB->getTerminator());
+      GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
+                                InVal->getName()+".phi.trans.insert",
+                                PredBB->getTerminator());
     Result->setIsInBounds(GEP->isInBounds());
     NewInsts.push_back(Result);
     return Result;
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index 7c584daef7..0e3b6e69ce 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -387,7 +387,7 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
 
     TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
     if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
-       || isa<UnwindInst>(terminator))
+       || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator))
       addEdge(currentNode, getExit(),0);
 
     currentNode->setColor(BallLarusNode::GRAY);
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 80eda79d8a..3a3529baf9 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -27,8 +27,8 @@ using namespace llvm;
 
 char RGPassManager::ID = 0;
 
-RGPassManager::RGPassManager(int Depth)
-  : FunctionPass(ID), PMDataManager(Depth) {
+RGPassManager::RGPassManager()
+  : FunctionPass(ID), PMDataManager() {
   skipThisRegion = false;
   redoThisRegion = false;
   RI = NULL;
@@ -250,7 +250,7 @@ void RegionPass::assignPassManager(PMStack &PMS,
     PMDataManager *PMD = PMS.top();
 
     // [1] Create new Region Pass Manager
-    RGPM = new RGPassManager(PMD->getDepth() + 1);
+    RGPM = new RGPassManager();
     RGPM->populateInheritedAnalysis(PMS);
 
     // [2] Set up new manager's top level manager
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 7f4d3ba479..e0ac56c65e 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -652,7 +652,7 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
 /// Assume, K > 0.
 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
                                        ScalarEvolution &SE,
-                                       Type* ResultTy) {
+                                       Type *ResultTy) {
   // Handle the simplest case efficiently.
   if (K == 1)
     return SE.getTruncateOrZeroExtend(It, ResultTy);
@@ -1070,14 +1070,26 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
 
   // Check for a simple looking step prior to loop entry.
   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
-  if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
+  if (!SA)
+    return 0;
+
+  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+  // subtraction is expensive. For this purpose, perform a quick and dirty
+  // difference, by checking for Step in the operand list.
+  SmallVector<const SCEV *, 4> DiffOps;
+  for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
+       I != E; ++I) {
+    if (*I != Step)
+      DiffOps.push_back(*I);
+  }
+  if (DiffOps.size() == SA->getNumOperands())
     return 0;
 
   // This is a postinc AR. Check for overflow on the preinc recurrence using the
   // same three conditions that getSignExtendedExpr checks.
 
   // 1. NSW flags on the step increment.
-  const SCEV *PreStart = SA->getOperand(1);
+  const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
     SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
 
@@ -1735,7 +1747,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
 
-      // Otherwise, add the folded AddRec by the non-liv parts.
+      // Otherwise, add the folded AddRec by the non-invariant parts.
       for (unsigned i = 0;; ++i)
         if (Ops[i] == AddRec) {
           Ops[i] = NewRec;
@@ -1800,6 +1812,38 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   return S;
 }
 
+static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
+  uint64_t k = i*j;
+  if (j > 1 && k / j != i) Overflow = true;
+  return k;
+}
+
+/// Compute the result of "n choose k", the binomial coefficient.  If an
+/// intermediate computation overflows, Overflow will be set and the return will
+/// be garbage. Overflow is not cleared on absense of overflow.
+static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
+  // We use the multiplicative formula:
+  //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
+  // At each iteration, we take the n-th term of the numeral and divide by the
+  // (k-n)th term of the denominator.  This division will always produce an
+  // integral result, and helps reduce the chance of overflow in the
+  // intermediate computations. However, we can still overflow even when the
+  // final result would fit.
+
+  if (n == 0 || n == k) return 1;
+  if (k > n) return 0;
+
+  if (k > n/2)
+    k = n-k;
+
+  uint64_t r = 1;
+  for (uint64_t i = 1; i <= k; ++i) {
+    r = umul_ov(r, n-(i-1), Overflow);
+    r /= i;
+  }
+  return r;
+}
+
 /// getMulExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -1960,7 +2004,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
 
-      // Otherwise, multiply the folded AddRec by the non-liv parts.
+      // Otherwise, multiply the folded AddRec by the non-invariant parts.
       for (unsigned i = 0;; ++i)
         if (Ops[i] == AddRec) {
           Ops[i] = NewRec;
@@ -1974,31 +2018,65 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     // multiplied together.  If so, we can fold them.
     for (unsigned OtherIdx = Idx+1;
          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-         ++OtherIdx)
+         ++OtherIdx) {
       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
-        // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L>  -->
-        // {A*C,+,F*D + G*B + B*D}<L>
+        // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+        // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+        //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+        //   ]]],+,...up to x=2n}.
+        // Note that the arguments to choose() are always integers with values
+        // known at compile time, never SCEV objects.
+        //
+        // The implementation avoids pointless extra computations when the two
+        // addrec's are of different length (mathematically, it's equivalent to
+        // an infinite stream of zeros on the right).
+        bool OpsModified = false;
         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
              ++OtherIdx)
           if (const SCEVAddRecExpr *OtherAddRec =
                 dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
             if (OtherAddRec->getLoop() == AddRecLoop) {
-              const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
-              const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
-              const SCEV *B = F->getStepRecurrence(*this);
-              const SCEV *D = G->getStepRecurrence(*this);
-              const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
-                                               getMulExpr(G, B),
-                                               getMulExpr(B, D));
-              const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
-                                                    F->getLoop(),
-                                                    SCEV::FlagAnyWrap);
-              if (Ops.size() == 2) return NewAddRec;
-              Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
-              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+              bool Overflow = false;
+              Type *Ty = AddRec->getType();
+              bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+              SmallVector<const SCEV*, 7> AddRecOps;
+              for (int x = 0, xe = AddRec->getNumOperands() +
+                     OtherAddRec->getNumOperands() - 1;
+                   x != xe && !Overflow; ++x) {
+                const SCEV *Term = getConstant(Ty, 0);
+                for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+                  uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+                  for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+                         ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+                       z < ze && !Overflow; ++z) {
+                    uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+                    uint64_t Coeff;
+                    if (LargerThan64Bits)
+                      Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+                    else
+                      Coeff = Coeff1*Coeff2;
+                    const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+                    const SCEV *Term1 = AddRec->getOperand(y-z);
+                    const SCEV *Term2 = OtherAddRec->getOperand(z);
+                    Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+                  }
+                }
+                AddRecOps.push_back(Term);
+              }
+              if (!Overflow) {
+                const SCEV *NewAddRec = getAddRecExpr(AddRecOps,
+                                                      AddRec->getLoop(),
+                                                      SCEV::FlagAnyWrap);
+                if (Ops.size() == 2) return NewAddRec;
+                Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+                Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+                OpsModified = true;
+              }
             }
-        return getMulExpr(Ops);
+        if (OpsModified)
+          return getMulExpr(Ops);
       }
+    }
 
     // Otherwise couldn't fold anything into this recurrence.  Move onto the
     // next one.
@@ -2051,12 +2129,13 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
         ++MaxShiftAmt;
       IntegerType *ExtTy =
         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
-      // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
         if (const SCEVConstant *Step =
-              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
-          if (!Step->getValue()->getValue()
-                .urem(RHSC->getValue()->getValue()) &&
+            dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
+          // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+          const APInt &StepInt = Step->getValue()->getValue();
+          const APInt &DivInt = RHSC->getValue()->getValue();
+          if (!StepInt.urem(DivInt) &&
               getZeroExtendExpr(AR, ExtTy) ==
               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                             getZeroExtendExpr(Step, ExtTy),
@@ -2067,6 +2146,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
             return getAddRecExpr(Operands, AR->getLoop(),
                                  SCEV::FlagNW);
           }
+          /// Get a canonical UDivExpr for a recurrence.
+          /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
+          // We can currently only fold X%N if X is constant.
+          const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
+          if (StartC && !DivInt.urem(StepInt) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop(), SCEV::FlagAnyWrap)) {
+            const APInt &StartInt = StartC->getValue()->getValue();
+            const APInt &StartRem = StartInt.urem(StepInt);
+            if (StartRem != 0)
+              LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
+                                  AR->getLoop(), SCEV::FlagNW);
+          }
+        }
       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
@@ -3503,7 +3598,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         AddOps.push_back(Op1);
     }
     AddOps.push_back(getSCEV(U->getOperand(0)));
-    return getAddExpr(AddOps);
+    SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+    OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V);
+    if (OBO->hasNoSignedWrap())
+      setFlags(Flags, SCEV::FlagNSW);
+    if (OBO->hasNoUnsignedWrap())
+      setFlags(Flags, SCEV::FlagNUW);
+    return getAddExpr(AddOps, Flags);
   }
   case Instruction::Mul: {
     // See the Add code above.
@@ -3813,6 +3914,70 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 //                   Iteration Count Computation Code
 //
 
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the maximum trip count is very large
+/// (>= 2^32)
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+                                                    BasicBlock *ExitBlock) {
+  const SCEVConstant *ExitCount =
+    dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+                                                       BasicBlock *ExitBlock) {
+  const SCEV *ExitCount = getExitCount(L, ExitBlock);
+  if (ExitCount == getCouldNotCompute())
+    return 1;
+
+  // Get the trip count from the BE count by adding 1.
+  const SCEV *TCMul = getAddExpr(ExitCount,
+                                 getConstant(ExitCount->getType(), 1));
+  // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+  // to factor simple cases.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+    TCMul = Mul->getOperand(0);
+
+  const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+  if (!MulC)
+    return 1;
+
+  ConstantInt *Result = MulC->getValue();
+
+  // Guard against huge trip counts.
+  if (!Result || Result->getValue().getActiveBits() > 32)
+    return 1;
+
+  return (unsigned)Result->getZExtValue();
+}
+
+// getExitCount - Get the expression for the number of loop iterations for which
+// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// SCEVCouldNotCompute.
+const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+  return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+}
+
 /// getBackedgeTakenCount - If the specified loop has a predictable
 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
 /// object. The backedge-taken count is the number of times the loop header
@@ -3825,14 +3990,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 /// hasLoopInvariantBackedgeTakenCount).
 ///
 const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
-  return getBackedgeTakenInfo(L).Exact;
+  return getBackedgeTakenInfo(L).getExact(this);
 }
 
 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
 /// return the least SCEV value that is known never to be less than the
 /// actual backedge taken count.
 const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
-  return getBackedgeTakenInfo(L).Max;
+  return getBackedgeTakenInfo(L).getMax(this);
 }
 
 /// PushLoopPHIs - Push PHI nodes in the header of the given loop
@@ -3849,33 +4014,31 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
 
 const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
-  // Initially insert a CouldNotCompute for this loop. If the insertion
+  // Initially insert an invalid entry for this loop. If the insertion
   // succeeds, proceed to actually compute a backedge-taken count and
   // update the value. The temporary CouldNotCompute value tells SCEV
   // code elsewhere that it shouldn't attempt to request a new
   // backedge-taken count, which could result in infinite recursion.
   std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
-    BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+    BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
   if (!Pair.second)
     return Pair.first->second;
 
-  BackedgeTakenInfo Result = getCouldNotCompute();
-  BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
-  if (Computed.Exact != getCouldNotCompute()) {
-    assert(isLoopInvariant(Computed.Exact, L) &&
-           isLoopInvariant(Computed.Max, L) &&
+  // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+  // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
+  // must be cleared in this scope.
+  BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+
+  if (Result.getExact(this) != getCouldNotCompute()) {
+    assert(isLoopInvariant(Result.getExact(this), L) &&
+           isLoopInvariant(Result.getMax(this), L) &&
            "Computed backedge-taken count isn't loop invariant for loop!");
     ++NumTripCountsComputed;
-
-    // Update the value in the map.
-    Result = Computed;
-  } else {
-    if (Computed.Max != getCouldNotCompute())
-      // Update the value in the map.
-      Result = Computed;
-    if (isa<PHINode>(L->getHeader()->begin()))
-      // Only count loops that have phi nodes as not being computable.
-      ++NumTripCountsNotComputed;
+  }
+  else if (Result.getMax(this) == getCouldNotCompute() &&
+           isa<PHINode>(L->getHeader()->begin())) {
+    // Only count loops that have phi nodes as not being computable.
+    ++NumTripCountsNotComputed;
   }
 
   // Now that we know more about the trip count for this loop, forget any
@@ -3883,7 +4046,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // conservative estimates made without the benefit of trip count
   // information. This is similar to the code in forgetLoop, except that
   // it handles SCEVUnknown PHI nodes specially.
-  if (Computed.hasAnyInfo()) {
+  if (Result.hasAnyInfo()) {
     SmallVector<Instruction *, 16> Worklist;
     PushLoopPHIs(L, Worklist);
 
@@ -3928,7 +4091,12 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
 /// compute a trip count, or if the loop is deleted.
 void ScalarEvolution::forgetLoop(const Loop *L) {
   // Drop any stored trip count value.
-  BackedgeTakenCounts.erase(L);
+  DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
+    BackedgeTakenCounts.find(L);
+  if (BTCPos != BackedgeTakenCounts.end()) {
+    BTCPos->second.clear();
+    BackedgeTakenCounts.erase(BTCPos);
+  }
 
   // Drop information about expressions based on loop-header PHIs.
   SmallVector<Instruction *, 16> Worklist;
@@ -3984,6 +4152,85 @@ void ScalarEvolution::forgetValue(Value *V) {
   }
 }
 
+/// getExact - Get the exact loop backedge taken count considering all loop
+/// exits. If all exits are computable, this is the minimum computed count.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+  // If any exits were not computable, the loop is not computable.
+  if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
+
+  // We need at least one computable exit.
+  if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
+  assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+
+  const SCEV *BECount = 0;
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != 0; ENT = ENT->getNextExit()) {
+
+    assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+
+    if (!BECount)
+      BECount = ENT->ExactNotTaken;
+    else
+      BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken);
+  }
+  assert(BECount && "Invalid not taken count for loop exit");
+  return BECount;
+}
+
+/// getExact - Get the exact not taken count for this loop exit.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+                                             ScalarEvolution *SE) const {
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != 0; ENT = ENT->getNextExit()) {
+
+    if (ENT->ExitingBlock == ExitingBlock)
+      return ENT->ExactNotTaken;
+  }
+  return SE->getCouldNotCompute();
+}
+
+/// getMax - Get the max backedge taken count for the loop.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+  return Max ? Max : SE->getCouldNotCompute();
+}
+
+/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
+/// computable exit into a persistent ExitNotTakenInfo array.
+ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
+  SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
+  bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+
+  if (!Complete)
+    ExitNotTaken.setIncomplete();
+
+  unsigned NumExits = ExitCounts.size();
+  if (NumExits == 0) return;
+
+  ExitNotTaken.ExitingBlock = ExitCounts[0].first;
+  ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+  if (NumExits == 1) return;
+
+  // Handle the rare case of multiple computable exits.
+  ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+
+  ExitNotTakenInfo *PrevENT = &ExitNotTaken;
+  for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
+    PrevENT->setNextExit(ENT);
+    ENT->ExitingBlock = ExitCounts[i].first;
+    ENT->ExactNotTaken = ExitCounts[i].second;
+  }
+}
+
+/// clear - Invalidate this result and free the ExitNotTakenInfo array.
+void ScalarEvolution::BackedgeTakenInfo::clear() {
+  ExitNotTaken.ExitingBlock = 0;
+  ExitNotTaken.ExactNotTaken = 0;
+  delete[] ExitNotTaken.getNextExit();
+}
+
 /// ComputeBackedgeTakenCount - Compute the number of times the backedge
 /// of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
@@ -3992,38 +4239,31 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   L->getExitingBlocks(ExitingBlocks);
 
   // Examine all exits and pick the most conservative values.
-  const SCEV *BECount = getCouldNotCompute();
   const SCEV *MaxBECount = getCouldNotCompute();
-  bool CouldNotComputeBECount = false;
+  bool CouldComputeBECount = true;
+  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    BackedgeTakenInfo NewBTI =
-      ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
-
-    if (NewBTI.Exact == getCouldNotCompute()) {
+    ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+    if (EL.Exact == getCouldNotCompute())
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
-      CouldNotComputeBECount = true;
-      BECount = getCouldNotCompute();
-    } else if (!CouldNotComputeBECount) {
-      if (BECount == getCouldNotCompute())
-        BECount = NewBTI.Exact;
-      else
-        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
-    }
+      CouldComputeBECount = false;
+    else
+      ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
+
     if (MaxBECount == getCouldNotCompute())
-      MaxBECount = NewBTI.Max;
-    else if (NewBTI.Max != getCouldNotCompute())
-      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+      MaxBECount = EL.Max;
+    else if (EL.Max != getCouldNotCompute())
+      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max);
   }
 
-  return BackedgeTakenInfo(BECount, MaxBECount);
+  return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
 }
 
-/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
-/// of the specified loop will execute if it exits via the specified block.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
-                                                   BasicBlock *ExitingBlock) {
+/// ComputeExitLimit - Compute the number of times the backedge of the specified
+/// loop will execute if it exits via the specified block.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
 
   // Okay, we've chosen an exiting block.  See what condition causes us to
   // exit at this block.
@@ -4081,95 +4321,91 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
   }
 
   // Proceed to the next level to examine the exit condition expression.
-  return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
-                                               ExitBr->getSuccessor(0),
-                                               ExitBr->getSuccessor(1));
+  return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
+                                  ExitBr->getSuccessor(0),
+                                  ExitBr->getSuccessor(1));
 }
 
-/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// ComputeExitLimitFromCond - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
-                                                       Value *ExitCond,
-                                                       BasicBlock *TBB,
-                                                       BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+                                          Value *ExitCond,
+                                          BasicBlock *TBB,
+                                          BasicBlock *FBB) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
-      BackedgeTakenInfo BTI0 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
-      BackedgeTakenInfo BTI1 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (L->contains(TBB)) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
-        if (BTI0.Exact == getCouldNotCompute() ||
-            BTI1.Exact == getCouldNotCompute())
+        if (EL0.Exact == getCouldNotCompute() ||
+            EL1.Exact == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
-          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max == getCouldNotCompute())
-          MaxBECount = BTI1.Max;
-        else if (BTI1.Max == getCouldNotCompute())
-          MaxBECount = BTI0.Max;
+          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+        if (EL0.Max == getCouldNotCompute())
+          MaxBECount = EL1.Max;
+        else if (EL1.Max == getCouldNotCompute())
+          MaxBECount = EL0.Max;
         else
-          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
       } else {
         // Both conditions must be true at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(FBB) && "Loop block has no successor in loop!");
-        if (BTI0.Max == BTI1.Max)
-          MaxBECount = BTI0.Max;
-        if (BTI0.Exact == BTI1.Exact)
-          BECount = BTI0.Exact;
+        if (EL0.Max == EL1.Max)
+          MaxBECount = EL0.Max;
+        if (EL0.Exact == EL1.Exact)
+          BECount = EL0.Exact;
       }
 
-      return BackedgeTakenInfo(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount);
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
-      BackedgeTakenInfo BTI0 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
-      BackedgeTakenInfo BTI1 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (L->contains(FBB)) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
-        if (BTI0.Exact == getCouldNotCompute() ||
-            BTI1.Exact == getCouldNotCompute())
+        if (EL0.Exact == getCouldNotCompute() ||
+            EL1.Exact == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
-          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max == getCouldNotCompute())
-          MaxBECount = BTI1.Max;
-        else if (BTI1.Max == getCouldNotCompute())
-          MaxBECount = BTI0.Max;
+          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+        if (EL0.Max == getCouldNotCompute())
+          MaxBECount = EL1.Max;
+        else if (EL1.Max == getCouldNotCompute())
+          MaxBECount = EL0.Max;
         else
-          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
       } else {
         // Both conditions must be false at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(TBB) && "Loop block has no successor in loop!");
-        if (BTI0.Max == BTI1.Max)
-          MaxBECount = BTI0.Max;
-        if (BTI0.Exact == BTI1.Exact)
-          BECount = BTI0.Exact;
+        if (EL0.Max == EL1.Max)
+          MaxBECount = EL0.Max;
+        if (EL0.Exact == EL1.Exact)
+          BECount = EL0.Exact;
       }
 
-      return BackedgeTakenInfo(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount);
     }
   }
 
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4185,17 +4421,17 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
   }
 
   // If it's not an integer or pointer comparison then compute it the hard way.
-  return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
-/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// ComputeExitLimitFromICmp - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
-                                                           ICmpInst *ExitCond,
-                                                           BasicBlock *TBB,
-                                                           BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+                                          ICmpInst *ExitCond,
+                                          BasicBlock *TBB,
+                                          BasicBlock *FBB) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
@@ -4207,8 +4443,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   // Handle common loops like: for (X = "string"; *X; ++X)
   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
-      BackedgeTakenInfo ItCnt =
-        ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+      ExitLimit ItCnt =
+        ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
       if (ItCnt.hasAnyInfo())
         return ItCnt;
     }
@@ -4247,36 +4483,36 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
     // Convert to: while (X-Y == 0)
-    BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+    if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SLT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+    if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SGT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
                                              getNotSCEV(RHS), L, true);
-    if (BTI.hasAnyInfo()) return BTI;
+    if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_ULT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+    if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_UGT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
                                              getNotSCEV(RHS), L, false);
-    if (BTI.hasAnyInfo()) return BTI;
+    if (EL.hasAnyInfo()) return EL;
     break;
   }
   default:
@@ -4290,8 +4526,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
 #endif
     break;
   }
-  return
-    ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
 static ConstantInt *
@@ -4338,15 +4573,16 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
   return Init;
 }
 
-/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
 /// 'icmp op load X, cst', try to see if we can compute the backedge
 /// execution count.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
-                                                LoadInst *LI,
-                                                Constant *RHS,
-                                                const Loop *L,
-                                                ICmpInst::Predicate predicate) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+  LoadInst *LI,
+  Constant *RHS,
+  const Loop *L,
+  ICmpInst::Predicate predicate) {
+
   if (LI->isVolatile()) return getCouldNotCompute();
 
   // Check to see if the loaded pointer is a getelementptr of a global.
@@ -4431,62 +4667,111 @@ static bool CanConstantFold(const Instruction *I) {
   return false;
 }
 
-/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
-/// in the loop that V is derived from.  We allow arbitrary operations along the
-/// way, but the operands of an operation must either be constants or a value
-/// derived from a constant PHI.  If this expression does not fit with these
-/// constraints, return null.
-static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
-  // If this is not an instruction, or if this is an instruction outside of the
-  // loop, it can't be derived from a loop PHI.
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !L->contains(I)) return 0;
+/// Determine whether this instruction can constant evolve within this loop
+/// assuming its operands can all constant evolve.
+static bool canConstantEvolve(Instruction *I, const Loop *L) {
+  // An instruction outside of the loop can't be derived from a loop PHI.
+  if (!L->contains(I)) return false;
 
-  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+  if (isa<PHINode>(I)) {
     if (L->getHeader() == I->getParent())
-      return PN;
+      return true;
     else
       // We don't currently keep track of the control flow needed to evaluate
       // PHIs, so we cannot handle PHIs inside of loops.
-      return 0;
+      return false;
   }
 
   // If we won't be able to constant fold this expression even if the operands
-  // are constants, return early.
-  if (!CanConstantFold(I)) return 0;
+  // are constants, bail early.
+  return CanConstantFold(I);
+}
+
+/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
+/// recursing through each instruction operand until reaching a loop header phi.
+static PHINode *
+getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
+                               DenseMap<Instruction *, PHINode *> &PHIMap) {
 
   // Otherwise, we can evaluate this instruction if all of its operands are
   // constant or derived from a PHI node themselves.
   PHINode *PHI = 0;
-  for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
-    if (!isa<Constant>(I->getOperand(Op))) {
-      PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
-      if (P == 0) return 0;  // Not evolving from PHI
-      if (PHI == 0)
-        PHI = P;
-      else if (PHI != P)
-        return 0;  // Evolving from multiple different PHIs.
+  for (Instruction::op_iterator OpI = UseInst->op_begin(),
+         OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
+
+    if (isa<Constant>(*OpI)) continue;
+
+    Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+    if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+
+    PHINode *P = dyn_cast<PHINode>(OpInst);
+    if (!P)
+      // If this operand is already visited, reuse the prior result.
+      // We may have P != PHI if this is the deepest point at which the
+      // inconsistent paths meet.
+      P = PHIMap.lookup(OpInst);
+    if (!P) {
+      // Recurse and memoize the results, whether a phi is found or not.
+      // This recursive call invalidates pointers into PHIMap.
+      P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+      PHIMap[OpInst] = P;
     }
-
+    if (P == 0) return 0;        // Not evolving from PHI
+    if (PHI && PHI != P) return 0;  // Evolving from multiple different PHIs.
+    PHI = P;
+  }
   // This is a expression evolving from a constant PHI!
   return PHI;
 }
 
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from.  We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI.  If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || !canConstantEvolve(I, L)) return 0;
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    return PN;
+  }
+
+  // Record non-constant instructions contained by the loop.
+  DenseMap<Instruction *, PHINode *> PHIMap;
+  return getConstantEvolvingPHIOperands(I, L, PHIMap);
+}
+
 /// EvaluateExpression - Given an expression that passes the
 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
 /// in the loop has the value PHIVal.  If we can't fold this expression for some
 /// reason, return null.
-static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+static Constant *EvaluateExpression(Value *V, const Loop *L,
+                                    DenseMap<Instruction *, Constant *> &Vals,
                                     const TargetData *TD) {
-  if (isa<PHINode>(V)) return PHIVal;
+  // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
+
   Instruction *I = cast<Instruction>(V);
+  if (Constant *C = Vals.lookup(I)) return C;
+
+  assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant");
+  assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop");
+  (void)L;
 
   std::vector<Constant*> Operands(I->getNumOperands());
 
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-    Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
-    if (Operands[i] == 0) return 0;
+    Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
+    if (!Operand) {
+      Operands[i] = dyn_cast<Constant>(I->getOperand(i));
+      if (!Operands[i]) return 0;
+      continue;
+    }
+    Constant *C = EvaluateExpression(Operand, L, Vals, TD);
+    Vals[Operand] = C;
+    if (!C) return 0;
+    Operands[i] = C;
   }
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
@@ -4513,6 +4798,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 
   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
 
+  // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis.
+  DenseMap<Instruction *, Constant *> CurrentIterVals;
+
   // Since the loop is canonicalized, the PHI node must have two entries.  One
   // entry must be a constant (coming in from outside of the loop), and the
   // second must be derived from the same PHI.
@@ -4521,6 +4809,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
   if (StartCST == 0)
     return RetVal = 0;  // Must be a constant.
+  CurrentIterVals[PN] = StartCST;
 
   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
   if (getConstantEvolvingPHI(BEValue, L) != PN &&
@@ -4533,29 +4822,31 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 
   unsigned NumIterations = BEs.getZExtValue(); // must be in range
   unsigned IterationNum = 0;
-  for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+  for (; ; ++IterationNum) {
     if (IterationNum == NumIterations)
-      return RetVal = PHIVal;  // Got exit value!
+      return RetVal = CurrentIterVals[PN];  // Got exit value!
 
     // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
-    if (NextPHI == PHIVal)
+    // EvaluateExpression adds non-phi values to the CurrentIterVals map.
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+    if (NextPHI == CurrentIterVals[PN])
       return RetVal = NextPHI;  // Stopped evolving!
     if (NextPHI == 0)
       return 0;        // Couldn't evaluate!
-    PHIVal = NextPHI;
+    DenseMap<Instruction *, Constant *> NextIterVals;
+    NextIterVals[PN] = NextPHI;
+    CurrentIterVals.swap(NextIterVals);
   }
 }
 
-/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// ComputeExitCountExhaustively - If the loop is known to execute a
 /// constant number of times (the condition evolves only from constants),
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
 /// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *
-ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
-                                                       Value *Cond,
-                                                       bool ExitWhen) {
+const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+                                                           Value *Cond,
+                                                           bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
   if (PN == 0) return getCouldNotCompute();
 
@@ -4582,8 +4873,10 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
   for (Constant *PHIVal = StartCST;
        IterationNum != MaxIterations; ++IterationNum) {
+    DenseMap<Instruction *, Constant *> PHIValMap;
+    PHIValMap[PN] = PHIVal;
     ConstantInt *CondVal =
-      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -4594,7 +4887,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
     }
 
     // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+    Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD);
     if (NextPHI == 0 || NextPHI == PHIVal)
       return getCouldNotCompute();// Couldn't evaluate or not making progress...
     PHIVal = NextPHI;
@@ -4924,7 +5217,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     // Compute the two solutions for the quadratic formula.
     // The divisions must be performed as signed divisions.
     APInt NegB(-B);
-    APInt TwoA( A << 1 );
+    APInt TwoA(A << 1);
     if (TwoA.isMinValue()) {
       const SCEV *CNC = SE.getCouldNotCompute();
       return std::make_pair(CNC, CNC);
@@ -4939,7 +5232,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 
     return std::make_pair(SE.getConstant(Solution1),
                           SE.getConstant(Solution2));
-    } // end APIntOps namespace
+  } // end APIntOps namespace
 }
 
 /// HowFarToZero - Return the number of times a backedge comparing the specified
@@ -4949,7 +5242,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 /// now expressed as a single expression, V = x-y. So the exit test is
 /// effectively V != 0.  We know and take advantage of the fact that this
 /// expression only being used in a comparison by zero context.
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
 ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
@@ -5033,8 +5326,19 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // Handle unitary steps, which cannot wraparound.
   // 1*N = -Start; -1*N = Start (mod 2^BW), so:
   //   N = Distance (as unsigned)
-  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
-    return Distance;
+  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+    ConstantRange CR = getUnsignedRange(Start);
+    const SCEV *MaxBECount;
+    if (!CountDown && CR.getUnsignedMin().isMinValue())
+      // When counting up, the worst starting value is 1, not 0.
+      MaxBECount = CR.getUnsignedMax().isMinValue()
+        ? getConstant(APInt::getMinValue(CR.getBitWidth()))
+        : getConstant(APInt::getMaxValue(CR.getBitWidth()));
+    else
+      MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
+                                         : -CR.getUnsignedMin());
+    return ExitLimit(Distance, MaxBECount);
+  }
 
   // If the recurrence is known not to wraparound, unsigned divide computes the
   // back edge count. We know that the value will either become zero (and thus
@@ -5061,7 +5365,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 /// HowFarToNonZero - Return the number of times a backedge checking the
 /// specified value for nonzero will execute.  If not computable, return
 /// CouldNotCompute
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
 ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // Loops that look like: while (X == 0) are very strange indeed.  We don't
   // handle them yet except for the trivial case.  This could be expanded in the
@@ -5774,7 +6078,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool isSigned) {
   // Only handle:  "ADDREC < LoopInvariant".
@@ -5881,7 +6185,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     if (isa<SCEVCouldNotCompute>(MaxBECount))
       MaxBECount = BECount;
 
-    return BackedgeTakenInfo(BECount, MaxBECount);
+    return ExitLimit(BECount, MaxBECount);
   }
 
   return getCouldNotCompute();
@@ -6089,6 +6393,15 @@ void ScalarEvolution::releaseMemory() {
   FirstUnknown = 0;
 
   ValueExprMap.clear();
+
+  // Free any extra memory created for ExitNotTakenInfo in the unlikely event
+  // that a loop had multiple computable exits.
+  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
+       I != E; ++I) {
+    I->second.clear();
+  }
+
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
   ValuesAtScopes.clear();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 1904bdc586..47f0f32116 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
 
@@ -103,7 +104,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     while ((isa<BitCastInst>(IP) &&
             isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
             cast<BitCastInst>(IP)->getOperand(0) != A) ||
-           isa<DbgInfoIntrinsic>(IP))
+           isa<DbgInfoIntrinsic>(IP) ||
+           isa<LandingPadInst>(IP))
       ++IP;
     return ReuseOrCreateCast(A, Ty, Op, IP);
   }
@@ -113,7 +115,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
   BasicBlock::iterator IP = I; ++IP;
   if (InvokeInst *II = dyn_cast<InvokeInst>(I))
     IP = II->getNormalDest()->begin();
-  while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+  while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) ||
+         isa<LandingPadInst>(IP))
+    ++IP;
   return ReuseOrCreateCast(I, Ty, Op, IP);
 }
 
@@ -160,7 +164,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   }
 
   // If we haven't found this binop, insert it.
-  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
   BO->setDebugLoc(SaveInsertPt->getDebugLoc());
   rememberInstruction(BO);
 
@@ -494,7 +498,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // Fold a GEP with constant operands.
     if (Constant *CLHS = dyn_cast<Constant>(V))
       if (Constant *CRHS = dyn_cast<Constant>(Idx))
-        return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+        return ConstantExpr::getGetElementPtr(CLHS, CRHS);
 
     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     unsigned ScanLimit = 6;
@@ -572,8 +576,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   if (V->getType() != PTy)
     Casted = InsertNoopCastOfTo(Casted, PTy);
   Value *GEP = Builder.CreateGEP(Casted,
-                                 GepIndices.begin(),
-                                 GepIndices.end(),
+                                 GepIndices,
                                  "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
   rememberInstruction(GEP);
@@ -841,6 +844,91 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
   }
 }
 
+/// Determine if this is a well-behaved chain of instructions leading back to
+/// the PHI. If so, it may be reused by expanded expressions.
+bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
+                                         const Loop *L) {
+  if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+      (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+    return false;
+  // If any of the operands don't dominate the insert position, bail.
+  // Addrec operands are always loop-invariant, so this can only happen
+  // if there are instructions which haven't been hoisted.
+  if (L == IVIncInsertLoop) {
+    for (User::op_iterator OI = IncV->op_begin()+1,
+           OE = IncV->op_end(); OI != OE; ++OI)
+      if (Instruction *OInst = dyn_cast<Instruction>(OI))
+        if (!SE.DT->dominates(OInst, IVIncInsertPos))
+          return false;
+  }
+  // Advance to the next instruction.
+  IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+  if (!IncV)
+    return false;
+
+  if (IncV->mayHaveSideEffects())
+    return false;
+
+  if (IncV != PN)
+    return true;
+
+  return isNormalAddRecExprPHI(PN, IncV, L);
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+                                           const Loop *L) {
+  switch (IncV->getOpcode()) {
+  // Check for a simple Add/Sub or GEP of a loop invariant step.
+  case Instruction::Add:
+  case Instruction::Sub:
+    return IncV->getOperand(0) == PN
+      && L->isLoopInvariant(IncV->getOperand(1));
+  case Instruction::BitCast:
+    IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0));
+    if (!IncV)
+      return false;
+    // fall-thru to GEP handling
+  case Instruction::GetElementPtr: {
+    // This must be a pointer addition of constants (pretty) or some number of
+    // address-size elements (ugly).
+    for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
+         I != E; ++I) {
+      if (isa<Constant>(*I))
+        continue;
+      // ugly geps have 2 operands.
+      // i1* is used by the expander to represent an address-size element.
+      if (IncV->getNumOperands() != 2)
+        return false;
+      unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
+      if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
+          && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+        return false;
+      // Ensure the operands dominate the insertion point. I don't know of a
+      // case when this would not be true, so this is somewhat untested.
+      if (L == IVIncInsertLoop) {
+        for (User::op_iterator OI = IncV->op_begin()+1,
+               OE = IncV->op_end(); OI != OE; ++OI)
+          if (Instruction *OInst = dyn_cast<Instruction>(OI))
+            if (!SE.DT->dominates(OInst, IVIncInsertPos))
+              return false;
+      }
+      break;
+    }
+    IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+    if (IncV && IncV->getOpcode() == Instruction::BitCast)
+      IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+    return IncV == PN;
+  }
+  default:
+    return false;
+  }
+}
+
 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
 /// the base addrec, which is the addrec without any non-loop-dominating
 /// values, and return the PHI.
@@ -852,70 +940,45 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
 
   // Reuse a previously-inserted PHI, if present.
-  for (BasicBlock::iterator I = L->getHeader()->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    if (SE.isSCEVable(PN->getType()) &&
-        (SE.getEffectiveSCEVType(PN->getType()) ==
-         SE.getEffectiveSCEVType(Normalized->getType())) &&
-        SE.getSCEV(PN) == Normalized)
-      if (BasicBlock *LatchBlock = L->getLoopLatch()) {
-        Instruction *IncV =
-          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
-
-        // Determine if this is a well-behaved chain of instructions leading
-        // back to the PHI. It probably will be, if we're scanning an inner
-        // loop already visited by LSR for example, but it wouldn't have
-        // to be.
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (LatchBlock) {
+    for (BasicBlock::iterator I = L->getHeader()->begin();
+         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      if (!SE.isSCEVable(PN->getType()) ||
+          (SE.getEffectiveSCEVType(PN->getType()) !=
+           SE.getEffectiveSCEVType(Normalized->getType())) ||
+          SE.getSCEV(PN) != Normalized)
+        continue;
+
+      Instruction *IncV =
+        cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+      if (LSRMode) {
+        if (!isExpandedAddRecExprPHI(PN, IncV, L))
+          continue;
+      }
+      else {
+        if (!isNormalAddRecExprPHI(PN, IncV, L))
+          continue;
+      }
+      // Ok, the add recurrence looks usable.
+      // Remember this PHI, even in post-inc mode.
+      InsertedValues.insert(PN);
+      // Remember the increment.
+      rememberInstruction(IncV);
+      if (L == IVIncInsertLoop)
         do {
-          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
-              (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
-            IncV = 0;
+          if (SE.DT->dominates(IncV, IVIncInsertPos))
             break;
-          }
-          // If any of the operands don't dominate the insert position, bail.
-          // Addrec operands are always loop-invariant, so this can only happen
-          // if there are instructions which haven't been hoisted.
-          if (L == IVIncInsertLoop) {
-            for (User::op_iterator OI = IncV->op_begin()+1,
-                   OE = IncV->op_end(); OI != OE; ++OI)
-              if (Instruction *OInst = dyn_cast<Instruction>(OI))
-                if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
-                  IncV = 0;
-                  break;
-                }
-          }
-          if (!IncV)
-            break;
-          // Advance to the next instruction.
-          IncV = dyn_cast<Instruction>(IncV->getOperand(0));
-          if (!IncV)
-            break;
-          if (IncV->mayHaveSideEffects()) {
-            IncV = 0;
-            break;
-          }
+          // Make sure the increment is where we want it. But don't move it
+          // down past a potential existing post-inc user.
+          IncV->moveBefore(IVIncInsertPos);
+          IVIncInsertPos = IncV;
+          IncV = cast<Instruction>(IncV->getOperand(0));
         } while (IncV != PN);
-
-        if (IncV) {
-          // Ok, the add recurrence looks usable.
-          // Remember this PHI, even in post-inc mode.
-          InsertedValues.insert(PN);
-          // Remember the increment.
-          IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
-          rememberInstruction(IncV);
-          if (L == IVIncInsertLoop)
-            do {
-              if (SE.DT->dominates(IncV, IVIncInsertPos))
-                break;
-              // Make sure the increment is where we want it. But don't move it
-              // down past a potential existing post-inc user.
-              IncV->moveBefore(IVIncInsertPos);
-              IVIncInsertPos = IncV;
-              IncV = cast<Instruction>(IncV->getOperand(0));
-            } while (IncV != PN);
-          return PN;
-        }
-      }
+      return PN;
+    }
+  }
 
   // Save the original insertion point so we can restore it when we're done.
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
@@ -978,7 +1041,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
       IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
       if (IncV->getType() != PN->getType()) {
-        IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+        IncV = Builder.CreateBitCast(IncV, PN->getType());
         rememberInstruction(IncV);
       }
     } else {
@@ -1057,6 +1120,14 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
     BasicBlock *LatchBlock = L->getLoopLatch();
     assert(LatchBlock && "PostInc mode requires a unique loop latch!");
     Result = PN->getIncomingValueForBlock(LatchBlock);
+
+    // For an expansion to use the postinc form, the client must call
+    // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
+    // or dominated by IVIncInsertPos.
+    assert((!isa<Instruction>(Result) ||
+            SE.DT->dominates(cast<Instruction>(Result),
+                             Builder.GetInsertPoint())) &&
+           "postinc expansion does not dominate use");
   }
 
   // Re-apply any non-loop-dominating scale.
@@ -1110,7 +1181,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
       llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
-    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
+           isa<LandingPadInst>(NewInsertPt))
       ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
@@ -1219,7 +1291,7 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
   Type *Ty = SE.getEffectiveSCEVType(S->getType());
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
-  Value *I = Builder.CreateTrunc(V, Ty, "tmp");
+  Value *I = Builder.CreateTrunc(V, Ty);
   rememberInstruction(I);
   return I;
 }
@@ -1228,7 +1300,7 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
   Type *Ty = SE.getEffectiveSCEVType(S->getType());
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
-  Value *I = Builder.CreateZExt(V, Ty, "tmp");
+  Value *I = Builder.CreateZExt(V, Ty);
   rememberInstruction(I);
   return I;
 }
@@ -1237,7 +1309,7 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
   Type *Ty = SE.getEffectiveSCEVType(S->getType());
   Value *V = expandCodeFor(S->getOperand(),
                            SE.getEffectiveSCEVType(S->getOperand()->getType()));
-  Value *I = Builder.CreateSExt(V, Ty, "tmp");
+  Value *I = Builder.CreateSExt(V, Ty);
   rememberInstruction(I);
   return I;
 }
@@ -1253,7 +1325,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
       LHS = InsertNoopCastOfTo(LHS, Ty);
     }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
+    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
     rememberInstruction(ICmp);
     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
     rememberInstruction(Sel);
@@ -1277,7 +1349,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
       LHS = InsertNoopCastOfTo(LHS, Ty);
     }
     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
+    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
     rememberInstruction(ICmp);
     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
     rememberInstruction(Sel);
@@ -1325,7 +1397,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
       if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
-        InsertPt = L->getHeader()->getFirstNonPHI();
+        InsertPt = L->getHeader()->getFirstInsertionPt();
       while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
       break;
@@ -1346,8 +1418,12 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Value *V = visit(S);
 
   // Remember the expanded value for this SCEV at this location.
-  if (PostIncLoops.empty())
-    InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+  //
+  // This is independent of PostIncLoops. The mapped value simply materializes
+  // the expression at this insertion point. If the mapped value happened to be
+  // a postinc expansion, it could be reused by a non postinc user, but only if
+  // its insertion point was already at the head of the loop.
+  InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
   restoreInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
@@ -1401,3 +1477,102 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
 
   return V;
 }
+
+/// hoistStep - Attempt to hoist an IV increment above a potential use.
+///
+/// To successfully hoist, two criteria must be met:
+/// - IncV operands dominate InsertPos and
+/// - InsertPos dominates IncV
+///
+/// Meeting the second condition means that we don't need to check all of IncV's
+/// existing uses (it's moving up in the domtree).
+///
+/// This does not yet recursively hoist the operands, although that would
+/// not be difficult.
+///
+/// This does not require a SCEVExpander instance and could be replaced by a
+/// general code-insertion helper.
+bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
+                             const DominatorTree *DT) {
+  if (DT->dominates(IncV, InsertPos))
+    return true;
+
+  if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
+    return false;
+
+  if (IncV->mayHaveSideEffects())
+    return false;
+
+  // Attempt to hoist IncV
+  for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
+       OI != OE; ++OI) {
+    Instruction *OInst = dyn_cast<Instruction>(OI);
+    if (OInst && !DT->dominates(OInst, InsertPos))
+      return false;
+  }
+  IncV->moveBefore(InsertPos);
+  return true;
+}
+
+/// replaceCongruentIVs - Check for congruent phis in this loop header and
+/// replace them with their most canonical representative. Return the number of
+/// phis eliminated.
+///
+/// This does not depend on any SCEVExpander state but should be used in
+/// the same context that SCEVExpander is used.
+unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
+                                           SmallVectorImpl<WeakVH> &DeadInsts) {
+  unsigned NumElim = 0;
+  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE.isSCEVable(Phi->getType()))
+      continue;
+
+    PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
+    if (!OrigPhiRef) {
+      OrigPhiRef = Phi;
+      continue;
+    }
+
+    // If one phi derives from the other via GEPs, types may differ.
+    // We could consider adding a bitcast here to handle it.
+    if (OrigPhiRef->getType() != Phi->getType())
+      continue;
+
+    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+      Instruction *OrigInc =
+        cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock));
+      Instruction *IsomorphicInc =
+        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+
+      // If this phi is more canonical, swap it with the original.
+      if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)
+          && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) {
+        std::swap(OrigPhiRef, Phi);
+        std::swap(OrigInc, IsomorphicInc);
+      }
+      // Replacing the congruent phi is sufficient because acyclic redundancy
+      // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+      // that a phi is congruent, it's often the head of an IV user cycle that
+      // is isomorphic with the original phi. So it's worth eagerly cleaning up
+      // the common case of a single IV increment.
+      if (OrigInc != IsomorphicInc &&
+          OrigInc->getType() == IsomorphicInc->getType() &&
+          SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) &&
+          hoistStep(OrigInc, IsomorphicInc, DT)) {
+        DEBUG_WITH_TYPE(DebugType, dbgs()
+                        << "INDVARS: Eliminated congruent iv.inc: "
+                        << *IsomorphicInc << '\n');
+        IsomorphicInc->replaceAllUsesWith(OrigInc);
+        DeadInsts.push_back(IsomorphicInc);
+      }
+    }
+    DEBUG_WITH_TYPE(DebugType, dbgs()
+                    << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+    ++NumElim;
+    Phi->replaceAllUsesWith(OrigPhiRef);
+    DeadInsts.push_back(Phi);
+  }
+  return NumElim;
+}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 60e630aaab..c66ecd6e87 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -60,20 +60,40 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
   return true;
 }
 
-const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
-                                         const SCEV *S,
-                                         Instruction *User,
-                                         Value *OperandValToReplace,
-                                         PostIncLoopSet &Loops,
-                                         ScalarEvolution &SE,
-                                         DominatorTree &DT) {
-  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
-    return S;
+namespace {
+
+/// Hold the state used during post-inc expression transformation, including a
+/// map of transformed expressions.
+class PostIncTransform {
+  TransformKind Kind;
+  PostIncLoopSet &Loops;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+
+  DenseMap<const SCEV*, const SCEV*> Transformed;
+
+public:
+  PostIncTransform(TransformKind kind, PostIncLoopSet &loops,
+                   ScalarEvolution &se, DominatorTree &dt):
+    Kind(kind), Loops(loops), SE(se), DT(dt) {}
+
+  const SCEV *TransformSubExpr(const SCEV *S, Instruction *User,
+                               Value *OperandValToReplace);
+
+protected:
+  const SCEV *TransformImpl(const SCEV *S, Instruction *User,
+                            Value *OperandValToReplace);
+};
+
+} // namespace
+
+/// Implement post-inc transformation for all valid expression types.
+const SCEV *PostIncTransform::
+TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
 
   if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
     const SCEV *O = X->getOperand();
-    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
-                                           Loops, SE, DT);
+    const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
     if (O != N)
       switch (S->getSCEVType()) {
       case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
@@ -93,9 +113,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
     // Transform each operand.
     for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
          I != E; ++I) {
-      const SCEV *O = *I;
-      const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
-      Operands.push_back(N);
+      Operands.push_back(TransformSubExpr(*I, LUser, 0));
     }
     // Conservatively use AnyWrap until/unless we need FlagNW.
     const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
@@ -104,8 +122,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
     case NormalizeAutodetect:
       if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
         const SCEV *TransformedStep =
-          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
-                                 User, OperandValToReplace, Loops, SE, DT);
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
         Result = SE.getMinusSCEV(Result, TransformedStep);
         Loops.insert(L);
       }
@@ -114,24 +132,20 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
       // sometimes fails to canonicalize two equal SCEVs to exactly the same
       // form. It's possibly a pessimization when this happens, but it isn't a
       // correctness problem, so disable this assert for now.
-      assert(S == TransformForPostIncUse(Denormalize, Result,
-                                         User, OperandValToReplace,
-                                         Loops, SE, DT) &&
+      assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
              "SCEV normalization is not invertible!");
 #endif
       break;
     case Normalize:
       if (Loops.count(L)) {
         const SCEV *TransformedStep =
-          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
-                                 User, OperandValToReplace, Loops, SE, DT);
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
         Result = SE.getMinusSCEV(Result, TransformedStep);
       }
 #if 0
       // See the comment on the assert above.
-      assert(S == TransformForPostIncUse(Denormalize, Result,
-                                         User, OperandValToReplace,
-                                         Loops, SE, DT) &&
+      assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
              "SCEV normalization is not invertible!");
 #endif
       break;
@@ -150,8 +164,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
     for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
          I != E; ++I) {
       const SCEV *O = *I;
-      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
-                                             Loops, SE, DT);
+      const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
       Changed |= N != O;
       Operands.push_back(N);
     }
@@ -170,10 +183,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
   if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
     const SCEV *LO = X->getLHS();
     const SCEV *RO = X->getRHS();
-    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
-                                            Loops, SE, DT);
-    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
-                                            Loops, SE, DT);
+    const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace);
+    const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace);
     if (LO != LN || RO != RN)
       return SE.getUDivExpr(LN, RN);
     return S;
@@ -182,3 +193,33 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
   llvm_unreachable("Unexpected SCEV kind!");
   return 0;
 }
+
+/// Manage recursive transformation across an expression DAG. Revisiting
+/// expressions would lead to exponential recursion.
+const SCEV *PostIncTransform::
+TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
+
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+
+  const SCEV *Result = Transformed.lookup(S);
+  if (Result)
+    return Result;
+
+  Result = TransformImpl(S, User, OperandValToReplace);
+  Transformed[S] = Result;
+  return Result;
+}
+
+/// Top level driver for transforming an expression DAG into its requested
+/// post-inc form (either "Normalized" or "Denormalized".
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  PostIncTransform Transform(Kind, Loops, SE, DT);
+  return Transform.TransformSubExpr(S, User, OperandValToReplace);
+}
author	Logan Chien <loganchien@google.com>	2011-10-20 00:08:13 +0800
committer	Logan Chien <loganchien@google.com>	2011-10-20 00:09:35 +0800
commit	0ebc07a576037e4e36f68bf5cece32740ca120c0 (patch)
tree	c2e40648043d01498ee25af839a071193561e425 /lib/Analysis
parent	62383e889e0b06fd12a6b88311717cd33a1925c4 (diff)
parent	cdd8e46bec4e975d00a5abea808d8eb4138515c5 (diff)
download	external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.tar.gz external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.tar.bz2 external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.zip