aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorLogan Chien <loganchien@google.com>2011-10-20 00:08:13 +0800
committerLogan Chien <loganchien@google.com>2011-10-20 00:09:35 +0800
commit0ebc07a576037e4e36f68bf5cece32740ca120c0 (patch)
treec2e40648043d01498ee25af839a071193561e425 /lib/Transforms
parent62383e889e0b06fd12a6b88311717cd33a1925c4 (diff)
parentcdd8e46bec4e975d00a5abea808d8eb4138515c5 (diff)
downloadexternal_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.tar.gz
external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.tar.bz2
external_llvm-0ebc07a576037e4e36f68bf5cece32740ca120c0.zip
Merge with LLVM upstream 2011/10/20 (r142530)
Conflicts: lib/Support/Unix/Host.inc Change-Id: Idc00db3b63912dca6348bddd9f8a1af2a8d5d147
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp17
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt12
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp49
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp42
-rw-r--r--lib/Transforms/IPO/IPO.cpp15
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp15
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp26
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp73
-rw-r--r--lib/Transforms/IPO/LowerSetJmp.cpp547
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp40
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp343
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp5
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp2
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h5
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp47
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp121
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp34
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp461
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp71
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp11
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp47
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp593
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt7
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp356
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp15
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp7
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt11
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp100
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp180
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp8
-rw-r--r--lib/Transforms/Scalar/GVN.cpp247
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp226
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp617
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp4
-rw-r--r--lib/Transforms/Scalar/LICM.cpp64
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp141
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp61
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp28
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp173
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp37
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp454
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp228
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp430
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp45
-rw-r--r--lib/Transforms/Scalar/Sink.cpp13
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp373
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp346
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp45
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt8
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp8
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp27
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp39
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp163
-rw-r--r--lib/Transforms/Utils/Local.cpp25
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp47
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp186
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp13
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp31
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp4
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp153
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp432
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp5
72 files changed, 4753 insertions, 3247 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index d92c45ff6a..e160f63ae3 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -382,7 +382,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
User *U = *UI;
Operands.clear();
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (LI->isVolatile()) return false; // Don't hack volatile loads
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
@@ -410,7 +411,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
UI != E; ++UI)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
- if (LI->isVolatile()) return false; // Don't hack volatile loads
+ // Don't hack volatile/atomic loads
+ if (!LI->isSimple()) return false;
Loads.push_back(LI);
} else {
// Other uses than load?
@@ -576,9 +578,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
// not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
- SI->begin(),
- SI->end()));
+ Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
assert(Params.back());
}
@@ -668,7 +668,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+ Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
(*AI)->getName()+"."+utostr(i),
Call);
// TODO: Tell AA about the new values?
@@ -699,8 +699,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
- V->getName()+".idx", Call);
+ V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
Ops.clear();
AA.copyValue(OrigLoad->getOperand(0), V);
}
@@ -801,7 +800,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
Value *Idx =
- GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+ GetElementPtrInst::Create(TheAlloca, Idxs,
TheAlloca->getName()+"."+Twine(i),
InsertPt);
I2->setName(I->getName()+"."+Twine(i));
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 3de7bfceed..4d8dbc2189 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -13,10 +13,20 @@ add_llvm_library(LLVMipo
Inliner.cpp
Internalize.cpp
LoopExtractor.cpp
- LowerSetJmp.cpp
MergeFunctions.cpp
PartialInlining.cpp
+ PassManagerBuilder.cpp
PruneEH.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
)
+
+add_llvm_library_dependencies(LLVMipo
+ LLVMAnalysis
+ LLVMCore
+ LLVMScalarOpts
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ LLVMipa
+ )
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index a21efced73..c3ecb7afff 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -23,7 +23,9 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -37,10 +39,18 @@ namespace {
initializeConstantMergePass(*PassRegistry::getPassRegistry());
}
- // run - For this pass, process all of the globals in the module,
- // eliminating duplicate constants.
- //
+ // For this pass, process all of the globals in the module, eliminating
+ // duplicate constants.
bool runOnModule(Module &M);
+
+ // Return true iff we can determine the alignment of this global variable.
+ bool hasKnownAlignment(GlobalVariable *GV) const;
+
+ // Return the alignment of the global, including converting the default
+ // alignment to a concrete value.
+ unsigned getAlignment(GlobalVariable *GV) const;
+
+ const TargetData *TD;
};
}
@@ -77,15 +87,28 @@ static bool IsBetterCannonical(const GlobalVariable &A,
return A.hasUnnamedAddr();
}
+bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
+ return TD || GV->getAlignment() != 0;
+}
+
+unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ if (TD)
+ return TD->getPreferredAlignment(GV);
+ return GV->getAlignment();
+}
+
bool ConstantMerge::runOnModule(Module &M) {
+ TD = getAnalysisIfAvailable<TargetData>();
+
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
- // Map unique constant/section pairs to globals. We don't want to merge
- // globals in different sections.
- DenseMap<Constant*, GlobalVariable*> CMap;
+ // Map unique <constants, has-unknown-alignment> pairs to globals. We don't
+ // want to merge globals of unknown alignment with those of explicit
+ // alignment. If we have TargetData, we always know the alignment.
+ DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
// Replacements - This vector contains a list of replacements to perform.
SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
@@ -120,7 +143,8 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- GlobalVariable *&Slot = CMap[Init];
+ PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
+ GlobalVariable *&Slot = CMap[Pair];
// If this is the first constant we find or if the old on is local,
// replace with the current one. It the current is externally visible
@@ -152,7 +176,8 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- GlobalVariable *Slot = CMap[Init];
+ PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
+ GlobalVariable *Slot = CMap[Pair];
if (!Slot || Slot == GV)
continue;
@@ -175,6 +200,14 @@ bool ConstantMerge::runOnModule(Module &M) {
// now. This avoid invalidating the pointers in CMap, which are unneeded
// now.
for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+ // Bump the alignment if necessary.
+ if (Replacements[i].first->getAlignment() ||
+ Replacements[i].second->getAlignment()) {
+ Replacements[i].second->setAlignment(std::max(
+ Replacements[i].first->getAlignment(),
+ Replacements[i].second->getAlignment()));
+ }
+
// Eliminate any uses of the dead global.
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 95decec0f8..0edf342750 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -163,14 +163,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
ReadsMemory = true;
continue;
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore non-volatile loads from local memory.
+ // Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
AliasAnalysis::Location Loc = AA->getLocation(LI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore non-volatile stores to local memory.
+ // Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
AliasAnalysis::Location Loc = AA->getLocation(SI);
if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index d817b6ed4a..3552d03919 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -195,12 +195,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
}
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
GS.isLoaded = true;
- if (LI->isVolatile()) return true; // Don't hack on volatile loads.
+ // Don't hack on volatile/atomic loads.
+ if (!LI->isSimple()) return true;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Don't allow a store OF the address, only stores TO the address.
if (SI->getOperand(0) == V) return true;
- if (SI->isVolatile()) return true; // Don't hack on volatile stores.
+ // Don't hack on volatile/atomic stores.
+ if (!SI->isSimple()) return true;
// If this is a direct store to the global (i.e., the global is a scalar
// value, not an aggregate), keep more specific information about
@@ -596,15 +598,14 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
Idxs.push_back(NullInt);
for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
Idxs.push_back(CE->getOperand(i));
- NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
- &Idxs[0], Idxs.size());
+ NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
} else {
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
SmallVector<Value*, 8> Idxs;
Idxs.push_back(NullInt);
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
- NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+ NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
GEPI->getName()+"."+Twine(Val),GEPI);
}
}
@@ -753,8 +754,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
- ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
- Idxs.size()));
+ ConstantExpr::getGetElementPtr(NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -1245,8 +1245,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
- Value *NGEPI = GetElementPtrInst::Create(NewPtr,
- GEPIdx.begin(), GEPIdx.end(),
+ Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
@@ -1260,11 +1259,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// already been seen first by another load, so its uses have already been
// processed.
PHINode *PN = cast<PHINode>(LoadUser);
- bool Inserted;
- DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
- tie(InsertPos, Inserted) =
- InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
- if (!Inserted) return;
+ if (!InsertedScalarizedValues.insert(std::make_pair(PN,
+ std::vector<Value*>())).second)
+ return;
// If this is the first time we've seen this PHI, recursively process all
// users.
@@ -1379,8 +1376,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
- Constant::getNullValue(GVVal->getType()),
- "tmp");
+ Constant::getNullValue(GVVal->getType()));
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
OrigBB->getParent());
BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
@@ -2338,7 +2334,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
Constant *InstResult = 0;
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
- if (SI->isVolatile()) return false; // no volatile accesses.
+ if (!SI->isSimple()) return false; // no volatile/atomic accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
@@ -2374,7 +2370,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
Constant * const IdxList[] = {IdxZero, IdxZero};
- Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+ Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
// If we can't improve the situation by introspecting NewTy,
// we have to give up.
@@ -2411,11 +2407,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
i != e; ++i)
GEPOps.push_back(getVal(Values, *i));
- InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
- ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
- ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+ InstResult =
+ ConstantExpr::getGetElementPtr(P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
- if (LI->isVolatile()) return false; // no volatile accesses.
+ if (!LI->isSimple()) return false; // no volatile/atomic accesses.
InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
MutatedMemory);
if (InstResult == 0) return false; // Could not evaluate load.
@@ -2511,7 +2507,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
CallStack.pop_back(); // return from fn.
return true; // We succeeded at evaluating this ctor!
} else {
- // invoke, unwind, unreachable.
+ // invoke, unwind, resume, unreachable.
return false; // Cannot handle this terminator.
}
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 31ce95f53d..6233922db9 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/IPO.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
@@ -35,7 +36,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
- initializeLowerSetJmpPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
@@ -70,6 +70,10 @@ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createFunctionInliningPass());
}
+void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(llvm::createAlwaysInlinerPass());
+}
+
void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createGlobalDCEPass());
}
@@ -82,10 +86,6 @@ void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIPConstantPropagationPass());
}
-void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerSetJmpPass());
-}
-
void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPruneEHPass());
}
@@ -98,11 +98,6 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
unwrap(PM)->add(createInternalizePass(AllButMain != 0));
}
-
-void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
- // FIXME: Remove in LLVM 3.0.
-}
-
void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createStripDeadPrototypesPass());
}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index ce795b7243..c0426da2c6 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -32,10 +33,10 @@ namespace {
// AlwaysInliner only inlines functions that are mark as "always inline".
class AlwaysInliner : public Inliner {
// Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
+ SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- // Use extremely low threshold.
+ // Use extremely low threshold.
AlwaysInliner() : Inliner(ID, -2000000000) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -52,8 +53,8 @@ namespace {
void growCachedCostInfo(Function* Caller, Function* Callee) {
CA.growCachedCostInfo(Caller, Callee);
}
- virtual bool doFinalization(CallGraph &CG) {
- return removeDeadFunctions(CG, &NeverInline);
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, &NeverInline);
}
virtual bool doInitialization(CallGraph &CG);
void releaseMemory() {
@@ -71,11 +72,13 @@ INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
-// doInitialization - Initializes the vector of functions that have not
+// doInitialization - Initializes the vector of functions that have not
// been annotated with the "always inline" attribute.
bool AlwaysInliner::doInitialization(CallGraph &CG) {
+ CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+
Module &M = CG.getModule();
-
+
for (Module::iterator I = M.begin(), E = M.end();
I != E; ++I)
if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 0c5b3be8f9..84dd4fdd98 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -30,7 +31,7 @@ namespace {
class SimpleInliner : public Inliner {
// Functions that are never inlined
- SmallPtrSet<const Function*, 16> NeverInline;
+ SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
SimpleInliner() : Inliner(ID) {
@@ -68,16 +69,17 @@ INITIALIZE_PASS_END(SimpleInliner, "inline",
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
-Pass *llvm::createFunctionInliningPass(int Threshold) {
+Pass *llvm::createFunctionInliningPass(int Threshold) {
return new SimpleInliner(Threshold);
}
// doInitialization - Initializes the vector of functions that have been
// annotated with the noinline attribute.
bool SimpleInliner::doInitialization(CallGraph &CG) {
-
+ CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+
Module &M = CG.getModule();
-
+
for (Module::iterator I = M.begin(), E = M.end();
I != E; ++I)
if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
@@ -85,34 +87,34 @@ bool SimpleInliner::doInitialization(CallGraph &CG) {
// Get llvm.noinline
GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
-
+
if (GV == 0)
return false;
// Don't crash on invalid code
if (!GV->hasDefinitiveInitializer())
return false;
-
+
const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-
+
if (InitList == 0)
return false;
// Iterate over each element and add to the NeverInline set
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-
+
// Get Source
const Constant *Elt = InitList->getOperand(i);
-
+
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
- if (CE->getOpcode() == Instruction::BitCast)
+ if (CE->getOpcode() == Instruction::BitCast)
Elt = CE->getOperand(0);
-
+
// Insert into set of functions to never inline
if (const Function *F = dyn_cast<Function>(Elt))
NeverInline.insert(F);
}
-
+
return false;
}
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 848944dc93..4f96afe44c 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/FunctionUtils.h"
#include "llvm/ADT/Statistic.h"
#include <fstream>
@@ -53,12 +54,12 @@ namespace {
char LoopExtractor::ID = 0;
INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false)
+ "Extract loops into new functions", false, false)
INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false)
+ "Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -100,9 +101,9 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
L->getHeader()->getParent()->getEntryBlock().getTerminator();
if (!isa<BranchInst>(EntryTI) ||
!cast<BranchInst>(EntryTI)->isUnconditional() ||
- EntryTI->getSuccessor(0) != L->getHeader())
+ EntryTI->getSuccessor(0) != L->getHeader()) {
ShouldExtractLoop = true;
- else {
+ } else {
// Check to see if any exits from the loop are more than just return
// blocks.
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -113,6 +114,21 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
break;
}
}
+
+ if (ShouldExtractLoop) {
+ // We must omit landing pads. Landing pads must accompany the invoke
+ // instruction. But this would result in a loop in the extracted
+ // function. An infinite cycle occurs when it tries to extract that loop as
+ // well.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i]->isLandingPad()) {
+ ShouldExtractLoop = false;
+ break;
+ }
+ }
+
if (ShouldExtractLoop) {
if (NumLoops == 0) return Changed;
--NumLoops;
@@ -149,6 +165,7 @@ namespace {
/// BlocksToNotExtract list.
class BlockExtractorPass : public ModulePass {
void LoadFile(const char *Filename);
+ void SplitLandingPadPreds(Function *F);
std::vector<BasicBlock*> BlocksToNotExtract;
std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
@@ -171,8 +188,7 @@ INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
//
-ModulePass *llvm::createBlockExtractorPass()
-{
+ModulePass *llvm::createBlockExtractorPass() {
return new BlockExtractorPass();
}
@@ -194,6 +210,37 @@ void BlockExtractorPass::LoadFile(const char *Filename) {
}
}
+/// SplitLandingPadPreds - The landing pad needs to be extracted with the invoke
+/// instruction. The critical edge breaker will refuse to break critical edges
+/// to a landing pad. So do them here. After this method runs, all landing pads
+/// should have only one predecessor.
+void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ InvokeInst *II = dyn_cast<InvokeInst>(I);
+ if (!II) continue;
+ BasicBlock *Parent = II->getParent();
+ BasicBlock *LPad = II->getUnwindDest();
+
+ // Look through the landing pad's predecessors. If one of them ends in an
+ // 'invoke', then we want to split the landing pad.
+ bool Split = false;
+ for (pred_iterator
+ PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ++PI) {
+ BasicBlock *BB = *PI;
+ if (BB->isLandingPad() && BB != Parent &&
+ isa<InvokeInst>(Parent->getTerminator())) {
+ Split = true;
+ break;
+ }
+ }
+
+ if (!Split) continue;
+
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs);
+ }
+}
+
bool BlockExtractorPass::runOnModule(Module &M) {
std::set<BasicBlock*> TranslatedBlocksToNotExtract;
for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
@@ -236,13 +283,21 @@ bool BlockExtractorPass::runOnModule(Module &M) {
// Now that we know which blocks to not extract, figure out which ones we WANT
// to extract.
std::vector<BasicBlock*> BlocksToExtract;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ SplitLandingPadPreds(&*F);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
if (!TranslatedBlocksToNotExtract.count(BB))
BlocksToExtract.push_back(BB);
+ }
- for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
- ExtractBasicBlock(BlocksToExtract[i]);
+ for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
+ SmallVector<BasicBlock*, 2> BlocksToExtractVec;
+ BlocksToExtractVec.push_back(BlocksToExtract[i]);
+ if (const InvokeInst *II =
+ dyn_cast<InvokeInst>(BlocksToExtract[i]->getTerminator()))
+ BlocksToExtractVec.push_back(II->getUnwindDest());
+ ExtractBasicBlock(BlocksToExtractVec);
+ }
return !BlocksToExtract.empty();
}
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
deleted file mode 100644
index 494cee20f2..0000000000
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ /dev/null
@@ -1,547 +0,0 @@
-//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the lowering of setjmp and longjmp to use the
-// LLVM invoke and unwind instructions as necessary.
-//
-// Lowering of longjmp is fairly trivial. We replace the call with a
-// call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
-// This unwinds the stack for us calling all of the destructors for
-// objects allocated on the stack.
-//
-// At a setjmp call, the basic block is split and the setjmp removed.
-// The calls in a function that have a setjmp are converted to invoke
-// where the except part checks to see if it's a longjmp exception and,
-// if so, if it's handled in the function. If it is, then it gets the
-// value returned by the longjmp and goes to where the basic block was
-// split. Invoke instructions are handled in a similar fashion with the
-// original except block being executed if it isn't a longjmp except
-// that is handled by that function.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// FIXME: This pass doesn't deal with PHI statements just yet. That is,
-// we expect this to occur before SSAification is done. This would seem
-// to make sense, but in general, it might be a good idea to make this
-// pass invokable via the "opt" command at will.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "lowersetjmp"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
-STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
-STATISTIC(CallsTransformed , "Number of calls invokified");
-STATISTIC(InvokesTransformed , "Number of invokes modified");
-
-namespace {
- //===--------------------------------------------------------------------===//
- // LowerSetJmp pass implementation.
- class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
- // LLVM library functions...
- Constant *InitSJMap; // __llvm_sjljeh_init_setjmpmap
- Constant *DestroySJMap; // __llvm_sjljeh_destroy_setjmpmap
- Constant *AddSJToMap; // __llvm_sjljeh_add_setjmp_to_map
- Constant *ThrowLongJmp; // __llvm_sjljeh_throw_longjmp
- Constant *TryCatchLJ; // __llvm_sjljeh_try_catching_longjmp_exception
- Constant *IsLJException; // __llvm_sjljeh_is_longjmp_exception
- Constant *GetLJValue; // __llvm_sjljeh_get_longjmp_value
-
- typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
-
- // Keep track of those basic blocks reachable via a depth-first search of
- // the CFG from a setjmp call. We only need to transform those "call" and
- // "invoke" instructions that are reachable from the setjmp call site.
- std::set<BasicBlock*> DFSBlocks;
-
- // The setjmp map is going to hold information about which setjmps
- // were called (each setjmp gets its own number) and with which
- // buffer it was called.
- std::map<Function*, AllocaInst*> SJMap;
-
- // The rethrow basic block map holds the basic block to branch to if
- // the exception isn't handled in the current function and needs to
- // be rethrown.
- std::map<const Function*, BasicBlock*> RethrowBBMap;
-
- // The preliminary basic block map holds a basic block that grabs the
- // exception and determines if it's handled by the current function.
- std::map<const Function*, BasicBlock*> PrelimBBMap;
-
- // The switch/value map holds a switch inst/call inst pair. The
- // switch inst controls which handler (if any) gets called and the
- // value is the value returned to that handler by the call to
- // __llvm_sjljeh_get_longjmp_value.
- std::map<const Function*, SwitchValuePair> SwitchValMap;
-
- // A map of which setjmps we've seen so far in a function.
- std::map<const Function*, unsigned> SetJmpIDMap;
-
- AllocaInst* GetSetJmpMap(Function* Func);
- BasicBlock* GetRethrowBB(Function* Func);
- SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
-
- void TransformLongJmpCall(CallInst* Inst);
- void TransformSetJmpCall(CallInst* Inst);
-
- bool IsTransformableFunction(StringRef Name);
- public:
- static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(ID) {
- initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
- }
-
- void visitCallInst(CallInst& CI);
- void visitInvokeInst(InvokeInst& II);
- void visitReturnInst(ReturnInst& RI);
- void visitUnwindInst(UnwindInst& UI);
-
- bool runOnModule(Module& M);
- bool doInitialization(Module& M);
- };
-} // end anonymous namespace
-
-char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
-
-// run - Run the transformation on the program. We grab the function
-// prototypes for longjmp and setjmp. If they are used in the program,
-// then we can go directly to the places they're at and transform them.
-bool LowerSetJmp::runOnModule(Module& M) {
- bool Changed = false;
-
- // These are what the functions are called.
- Function* SetJmp = M.getFunction("llvm.setjmp");
- Function* LongJmp = M.getFunction("llvm.longjmp");
-
- // This program doesn't have longjmp and setjmp calls.
- if ((!LongJmp || LongJmp->use_empty()) &&
- (!SetJmp || SetJmp->use_empty())) return false;
-
- // Initialize some values and functions we'll need to transform the
- // setjmp/longjmp functions.
- doInitialization(M);
-
- if (SetJmp) {
- for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
- B != E; ++B) {
- BasicBlock* BB = cast<Instruction>(*B)->getParent();
- for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
- E = df_ext_end(BB, DFSBlocks); I != E; ++I)
- /* empty */;
- }
-
- while (!SetJmp->use_empty()) {
- assert(isa<CallInst>(SetJmp->use_back()) &&
- "User of setjmp intrinsic not a call?");
- TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
- Changed = true;
- }
- }
-
- if (LongJmp)
- while (!LongJmp->use_empty()) {
- assert(isa<CallInst>(LongJmp->use_back()) &&
- "User of longjmp intrinsic not a call?");
- TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
- Changed = true;
- }
-
- // Now go through the affected functions and convert calls and invokes
- // to new invokes...
- for (std::map<Function*, AllocaInst*>::iterator
- B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
- Function* F = B->first;
- for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
- for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
- visit(*IB++);
- if (IB != BB->end() && IB->getParent() != BB)
- break; // The next instruction got moved to a different block!
- }
- }
-
- DFSBlocks.clear();
- SJMap.clear();
- RethrowBBMap.clear();
- PrelimBBMap.clear();
- SwitchValMap.clear();
- SetJmpIDMap.clear();
-
- return Changed;
-}
-
-// doInitialization - For the lower long/setjmp pass, this ensures that a
-// module contains a declaration for the intrisic functions we are going
-// to call to convert longjmp and setjmp calls.
-//
-// This function is always successful, unless it isn't.
-bool LowerSetJmp::doInitialization(Module& M)
-{
- Type *SBPTy = Type::getInt8PtrTy(M.getContext());
- Type *SBPPTy = PointerType::getUnqual(SBPTy);
-
- // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
- // a description of the following library functions.
-
- // void __llvm_sjljeh_init_setjmpmap(void**)
- InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
- Type::getVoidTy(M.getContext()),
- SBPPTy, (Type *)0);
- // void __llvm_sjljeh_destroy_setjmpmap(void**)
- DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
- Type::getVoidTy(M.getContext()),
- SBPPTy, (Type *)0);
-
- // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
- AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
- Type::getVoidTy(M.getContext()),
- SBPPTy, SBPTy,
- Type::getInt32Ty(M.getContext()),
- (Type *)0);
-
- // void __llvm_sjljeh_throw_longjmp(int*, int)
- ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
- Type::getVoidTy(M.getContext()), SBPTy,
- Type::getInt32Ty(M.getContext()),
- (Type *)0);
-
- // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
- TryCatchLJ =
- M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
- Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
-
- // bool __llvm_sjljeh_is_longjmp_exception()
- IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
- Type::getInt1Ty(M.getContext()),
- (Type *)0);
-
- // int __llvm_sjljeh_get_longjmp_value()
- GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
- Type::getInt32Ty(M.getContext()),
- (Type *)0);
- return true;
-}
-
-// IsTransformableFunction - Return true if the function name isn't one
-// of the ones we don't want transformed. Currently, don't transform any
-// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
-// handling functions (beginning with __llvm_sjljeh_...they don't throw
-// exceptions).
-bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
- return !Name.startswith("__llvm_sjljeh_");
-}
-
-// TransformLongJmpCall - Transform a longjmp call into a call to the
-// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
-// throwing the exception for us.
-void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
-{
- Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
-
- // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
- // same parameters as "longjmp", except that the buffer is cast to a
- // char*. It returns "void", so it doesn't need to replace any of
- // Inst's uses and doesn't get a name.
- CastInst* CI =
- new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
- Value *Args[] = { CI, Inst->getArgOperand(1) };
- CallInst::Create(ThrowLongJmp, Args, "", Inst);
-
- SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
-
- // If the function has a setjmp call in it (they are transformed first)
- // we should branch to the basic block that determines if this longjmp
- // is applicable here. Otherwise, issue an unwind.
- if (SVP.first)
- BranchInst::Create(SVP.first->getParent(), Inst);
- else
- new UnwindInst(Inst->getContext(), Inst);
-
- // Remove all insts after the branch/unwind inst. Go from back to front to
- // avoid replaceAllUsesWith if possible.
- BasicBlock *BB = Inst->getParent();
- Instruction *Removed;
- do {
- Removed = &BB->back();
- // If the removed instructions have any users, replace them now.
- if (!Removed->use_empty())
- Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
- Removed->eraseFromParent();
- } while (Removed != Inst);
-
- ++LongJmpsTransformed;
-}
-
-// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
-// setjmp map. This map is going to hold information about which setjmps
-// were called (each setjmp gets its own number) and with which buffer it
-// was called. There can be only one!
-AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
-{
- if (SJMap[Func]) return SJMap[Func];
-
- // Insert the setjmp map initialization before the first instruction in
- // the function.
- Instruction* Inst = Func->getEntryBlock().begin();
- assert(Inst && "Couldn't find even ONE instruction in entry block!");
-
- // Fill in the alloca and call to initialize the SJ map.
- Type *SBPTy =
- Type::getInt8PtrTy(Func->getContext());
- AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
- CallInst::Create(InitSJMap, Map, "", Inst);
- return SJMap[Func] = Map;
-}
-
-// GetRethrowBB - Only one rethrow basic block is needed per function.
-// If this is a longjmp exception but not handled in this block, this BB
-// performs the rethrow.
-BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
-{
- if (RethrowBBMap[Func]) return RethrowBBMap[Func];
-
- // The basic block we're going to jump to if we need to rethrow the
- // exception.
- BasicBlock* Rethrow =
- BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
-
- // Fill in the "Rethrow" BB with a call to rethrow the exception. This
- // is the last instruction in the BB since at this point the runtime
- // should exit this function and go to the next function.
- new UnwindInst(Func->getContext(), Rethrow);
- return RethrowBBMap[Func] = Rethrow;
-}
-
-// GetSJSwitch - Return the switch statement that controls which handler
-// (if any) gets called and the value returned to that handler.
-LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
- BasicBlock* Rethrow)
-{
- if (SwitchValMap[Func].first) return SwitchValMap[Func];
-
- BasicBlock* LongJmpPre =
- BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
-
- // Keep track of the preliminary basic block for some of the other
- // transformations.
- PrelimBBMap[Func] = LongJmpPre;
-
- // Grab the exception.
- CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
-
- // The "decision basic block" gets the number associated with the
- // setjmp call returning to switch on and the value returned by
- // longjmp.
- BasicBlock* DecisionBB =
- BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
-
- BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
-
- // Fill in the "decision" basic block.
- CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
- CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
- DecisionBB);
-
- SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
- return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
-}
-
-// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
-// We're going to convert all setjmp calls to nops. Then all "call" and
-// "invoke" instructions in the function are converted to "invoke" where
-// the "except" branch is used when returning from a longjmp call.
-void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
-{
- BasicBlock* ABlock = Inst->getParent();
- Function* Func = ABlock->getParent();
-
- // Add this setjmp to the setjmp map.
- Type* SBPTy =
- Type::getInt8PtrTy(Inst->getContext());
- CastInst* BufPtr =
- new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
- Value *Args[] = {
- GetSetJmpMap(Func), BufPtr,
- ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
- };
- CallInst::Create(AddSJToMap, Args, "", Inst);
-
- // We are guaranteed that there are no values live across basic blocks
- // (because we are "not in SSA form" yet), but there can still be values live
- // in basic blocks. Because of this, splitting the setjmp block can cause
- // values above the setjmp to not dominate uses which are after the setjmp
- // call. For all of these occasions, we must spill the value to the stack.
- //
- std::set<Instruction*> InstrsAfterCall;
-
- // The call is probably very close to the end of the basic block, for the
- // common usage pattern of: 'if (setjmp(...))', so keep track of the
- // instructions after the call.
- for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
- I != E; ++I)
- InstrsAfterCall.insert(I);
-
- for (BasicBlock::iterator II = ABlock->begin();
- II != BasicBlock::iterator(Inst); ++II)
- // Loop over all of the uses of instruction. If any of them are after the
- // call, "spill" the value to the stack.
- for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (cast<Instruction>(U)->getParent() != ABlock ||
- InstrsAfterCall.count(cast<Instruction>(U))) {
- DemoteRegToStack(*II);
- break;
- }
- }
- InstrsAfterCall.clear();
-
- // Change the setjmp call into a branch statement. We'll remove the
- // setjmp call in a little bit. No worries.
- BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
- assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
-
- SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
-
- // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
- DFSBlocks.insert(SetJmpContBlock);
-
- // This PHI node will be in the new block created from the
- // splitBasicBlock call.
- PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2,
- "SetJmpReturn", Inst);
-
- // Coming from a call to setjmp, the return is 0.
- PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
- ABlock);
-
- // Add the case for this setjmp's number...
- SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
- SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
- SetJmpIDMap[Func] - 1),
- SetJmpContBlock);
-
- // Value coming from the handling of the exception.
- PHI->addIncoming(SVP.second, SVP.second->getParent());
-
- // Replace all uses of this instruction with the PHI node created by
- // the eradication of setjmp.
- Inst->replaceAllUsesWith(PHI);
- Inst->eraseFromParent();
-
- ++SetJmpsTransformed;
-}
-
-// visitCallInst - This converts all LLVM call instructions into invoke
-// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
-// that grabs the exception and proceeds to determine if it's a longjmp
-// exception or not.
-void LowerSetJmp::visitCallInst(CallInst& CI)
-{
- if (CI.getCalledFunction())
- if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
- CI.getCalledFunction()->isIntrinsic()) return;
-
- BasicBlock* OldBB = CI.getParent();
-
- // If not reachable from a setjmp call, don't transform.
- if (!DFSBlocks.count(OldBB)) return;
-
- BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
- assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
- DFSBlocks.insert(NewBB);
- NewBB->setName("Call2Invoke");
-
- Function* Func = OldBB->getParent();
-
- // Construct the new "invoke" instruction.
- TerminatorInst* Term = OldBB->getTerminator();
- CallSite CS(&CI);
- std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
- InvokeInst* II =
- InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
- Params, CI.getName(), Term);
- II->setCallingConv(CI.getCallingConv());
- II->setAttributes(CI.getAttributes());
-
- // Replace the old call inst with the invoke inst and remove the call.
- CI.replaceAllUsesWith(II);
- CI.eraseFromParent();
-
- // The old terminator is useless now that we have the invoke inst.
- Term->eraseFromParent();
- ++CallsTransformed;
-}
-
-// visitInvokeInst - Converting the "invoke" instruction is fairly
-// straight-forward. The old exception part is replaced by a query asking
-// if this is a longjmp exception. If it is, then it goes to the longjmp
-// exception blocks. Otherwise, control is passed the old exception.
-void LowerSetJmp::visitInvokeInst(InvokeInst& II)
-{
- if (II.getCalledFunction())
- if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
- II.getCalledFunction()->isIntrinsic()) return;
-
- BasicBlock* BB = II.getParent();
-
- // If not reachable from a setjmp call, don't transform.
- if (!DFSBlocks.count(BB)) return;
-
- BasicBlock* ExceptBB = II.getUnwindDest();
-
- Function* Func = BB->getParent();
- BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(),
- "InvokeExcept", Func);
-
- // If this is a longjmp exception, then branch to the preliminary BB of
- // the longjmp exception handling. Otherwise, go to the old exception.
- CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
- NewExceptBB);
-
- BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
-
- II.setUnwindDest(NewExceptBB);
- ++InvokesTransformed;
-}
-
-// visitReturnInst - We want to destroy the setjmp map upon exit from the
-// function.
-void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
- Function* Func = RI.getParent()->getParent();
- CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
-}
-
-// visitUnwindInst - We want to destroy the setjmp map upon exit from the
-// function.
-void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
- Function* Func = UI.getParent()->getParent();
- CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
-}
-
-ModulePass *llvm::createLowerSetJmpPass() {
- return new LowerSetJmp();
-}
-
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index f3d7e46ad2..0b01c3822f 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -305,10 +305,14 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
// Check special state that is a part of some instructions.
if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+ LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() &&
+ LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+ SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() &&
+ SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(I1))
@@ -317,22 +321,22 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
- if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
- return false;
- for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
- if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
- return false;
- return true;
- }
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
- if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
- return false;
- for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
- if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
- return false;
- return true;
- }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
+ return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
+ return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
+ return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
+ CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
return true;
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
new file mode 100644
index 0000000000..8fdfd72237
--- /dev/null
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -0,0 +1,343 @@
+//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PassManagerBuilder class, which is used to set up a
+// "standard" optimization sequence suitable for languages like C and C++.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+#include "llvm-c/Transforms/PassManagerBuilder.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/DefaultPasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+PassManagerBuilder::PassManagerBuilder() {
+ OptLevel = 2;
+ SizeLevel = 0;
+ LibraryInfo = 0;
+ Inliner = 0;
+ DisableSimplifyLibCalls = false;
+ DisableUnitAtATime = false;
+ DisableUnrollLoops = false;
+}
+
+PassManagerBuilder::~PassManagerBuilder() {
+ delete LibraryInfo;
+ delete Inliner;
+}
+
+/// Set of global extensions, automatically added as part of the standard set.
+static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
+ PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+
+void PassManagerBuilder::addGlobalExtension(
+ PassManagerBuilder::ExtensionPointTy Ty,
+ PassManagerBuilder::ExtensionFn Fn) {
+ GlobalExtensions->push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
+ Extensions.push_back(std::make_pair(Ty, Fn));
+}
+
+void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
+ PassManagerBase &PM) const {
+ for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
+ if ((*GlobalExtensions)[i].first == ETy)
+ (*GlobalExtensions)[i].second(*this, PM);
+ for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
+ if (Extensions[i].first == ETy)
+ Extensions[i].second(*this, PM);
+}
+
+void
+PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+}
+
+void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+ addExtensionsToPM(EP_EarlyAsPossible, FPM);
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ if (OptLevel == 0) return;
+
+ addInitialAliasAnalysisPasses(FPM);
+
+ FPM.add(createCFGSimplificationPass());
+ FPM.add(createScalarReplAggregatesPass());
+ FPM.add(createEarlyCSEPass());
+ FPM.add(createLowerExpectIntrinsicPass());
+}
+
+void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+ // If all optimizations are disabled, just run the always-inline pass.
+ if (OptLevel == 0) {
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = 0;
+ }
+ return;
+ }
+
+ // Add LibraryInfo if we have some.
+ if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ addInitialAliasAnalysisPasses(MPM);
+
+ if (!DisableUnitAtATime) {
+ MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
+
+ MPM.add(createIPSCCPPass()); // IP SCCP
+ MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
+
+ MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+ }
+
+ // Start of CallGraph SCC passes.
+ if (!DisableUnitAtATime)
+ MPM.add(createPruneEHPass()); // Remove dead EH info
+ if (Inliner) {
+ MPM.add(Inliner);
+ Inliner = 0;
+ }
+ if (!DisableUnitAtATime)
+ MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ if (OptLevel > 2)
+ MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ // Start of function pass.
+ // Break up aggregate allocas, using SSAUpdater.
+ MPM.add(createScalarReplAggregatesPass(-1, false));
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ if (!DisableSimplifyLibCalls)
+ MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
+ MPM.add(createJumpThreadingPass()); // Thread jumps.
+ MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createReassociatePass()); // Reassociate expressions
+ MPM.add(createLoopRotatePass()); // Rotate Loop
+ MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ MPM.add(createLoopDeletionPass()); // Delete dead loops
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass()); // Unroll small loops
+ addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
+
+ if (OptLevel > 1)
+ MPM.add(createGVNPass()); // Remove redundancies
+ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
+ MPM.add(createSCCPPass()); // Constant prop with SCCP
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createJumpThreadingPass()); // Thread jumps
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+
+ addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+
+ MPM.add(createAggressiveDCEPass()); // Delete dead instructions
+ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+
+ if (!DisableUnitAtATime) {
+ // FIXME: We shouldn't bother with this anymore.
+ MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+
+ // GlobalOpt already deletes dead functions and globals, at -O3 try a
+ // late pass of GlobalDCE. It is capable of deleting dead cycles.
+ if (OptLevel > 2)
+ MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
+
+ if (OptLevel > 1)
+ MPM.add(createConstantMergePass()); // Merge dup global constants
+ }
+}
+
+void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
+ bool Internalize,
+ bool RunInliner) {
+ // Provide AliasAnalysis services for optimizations.
+ addInitialAliasAnalysisPasses(PM);
+
+ // Now that composite has been compiled, scan through the module, looking
+ // for a main function. If main is defined, mark all other functions
+ // internal.
+ if (Internalize)
+ PM.add(createInternalizePass(true));
+
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ PM.add(createIPSCCPPass());
+
+ // Now that we internalized some globals, see if we can hack on them!
+ PM.add(createGlobalOptimizerPass());
+
+ // Linking modules together can lead to duplicated global constants, only
+ // keep one copy of each constant.
+ PM.add(createConstantMergePass());
+
+ // Remove unused arguments from functions.
+ PM.add(createDeadArgEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ PM.add(createInstructionCombiningPass());
+
+ // Inline small functions
+ if (RunInliner)
+ PM.add(createFunctionInliningPass());
+
+ PM.add(createPruneEHPass()); // Remove dead EH info.
+
+ // Optimize globals again if we ran the inliner.
+ if (RunInliner)
+ PM.add(createGlobalOptimizerPass());
+ PM.add(createGlobalDCEPass()); // Remove dead functions.
+
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ PM.add(createArgumentPromotionPass());
+
+ // The IPO passes may leave cruft around. Clean up after them.
+ PM.add(createInstructionCombiningPass());
+ PM.add(createJumpThreadingPass());
+ // Break up allocas
+ PM.add(createScalarReplAggregatesPass());
+
+ // Run a few AA driven optimizations here and now, to cleanup the code.
+ PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createGlobalsModRefPass()); // IP alias analysis.
+
+ PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createGVNPass()); // Remove redundancies.
+ PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+ // Nuke dead stores.
+ PM.add(createDeadStoreEliminationPass());
+
+ // Cleanup and simplify the code after the scalar optimizations.
+ PM.add(createInstructionCombiningPass());
+
+ PM.add(createJumpThreadingPass());
+
+ // Delete basic blocks, which optimization passes may have killed.
+ PM.add(createCFGSimplificationPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ PM.add(createGlobalDCEPass());
+}
+
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) {
+ PassManagerBuilder *PMB = new PassManagerBuilder();
+ return wrap(PMB);
+}
+
+void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ delete Builder;
+}
+
+void
+LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned OptLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->OptLevel = OptLevel;
+}
+
+void
+LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
+ unsigned SizeLevel) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->SizeLevel = SizeLevel;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnitAtATime = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableUnrollLoops = Value;
+}
+
+void
+LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
+ LLVMBool Value) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->DisableSimplifyLibCalls = Value;
+}
+
+void
+LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
+ unsigned Threshold) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ Builder->Inliner = createFunctionInliningPass(Threshold);
+}
+
+void
+LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+ Builder->populateFunctionPassManager(*FPM);
+}
+
+void
+LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ PassManagerBase *MPM = unwrap(PM);
+ Builder->populateModulePassManager(*MPM);
+}
+
+void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
+ LLVMPassManagerRef PM,
+ bool Internalize,
+ bool RunInliner) {
+ PassManagerBuilder *Builder = unwrap(PMB);
+ PassManagerBase *LPM = unwrap(PM);
+ Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
+}
+
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index b7e63dc448..cbb80f0750 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -101,8 +101,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
// Check to see if this function performs an unwind or calls an
// unwinding function.
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
- // Uses unwind!
+ if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) ||
+ isa<ResumeInst>(BB->getTerminator()))) {
+ // Uses unwind / resume!
SCCMightUnwind = true;
} else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
SCCMightReturn = true;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 0fbaff1509..b5caa9a557 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -180,7 +180,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
- if (STy->isAnonymous() || STy->getName().empty()) continue;
+ if (STy->isLiteral() || STy->getName().empty()) continue;
if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
continue;
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index d070ccc0d6..a46d5adc0a 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,3 +13,11 @@ add_llvm_library(LLVMInstCombine
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
+
+add_llvm_library_dependencies(LLVMInstCombine
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index c6bdb08998..38082787ce 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,6 +11,7 @@
#define INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -192,6 +193,7 @@ public:
Instruction *visitExtractElementInst(ExtractElementInst &EI);
Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
Instruction *visitExtractValueInst(ExtractValueInst &EV);
+ Instruction *visitLandingPadInst(LandingPadInst &LI);
// visitInstruction - Specify what to return for unhandled instructions...
Instruction *visitInstruction(Instruction &I) { return 0; }
@@ -214,7 +216,8 @@ private:
Instruction *visitCallSite(CallSite CS);
Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
bool transformConstExprCastCall(CallSite CS);
- Instruction *transformCallThroughTrampoline(CallSite CS);
+ Instruction *transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 32920fabc3..5e0bfe8e26 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1174,30 +1174,31 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
((A == C && B == D) || (A == D && B == C)))
return BinaryOperator::CreateXor(A, B);
- if (Op0->hasOneUse() &&
- match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
- if (A == Op1) { // (A^B)&A -> A&(A^B)
- I.swapOperands(); // Simplify below
- std::swap(Op0, Op1);
- } else if (B == Op1) { // (A^B)&B -> B&(B^A)
- cast<BinaryOperator>(Op0)->swapOperands();
- I.swapOperands(); // Simplify below
- std::swap(Op0, Op1);
+ // A&(A^B) => A & ~B
+ {
+ Value *tmpOp0 = Op0;
+ Value *tmpOp1 = Op1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+ if (A == Op1 || B == Op1 ) {
+ tmpOp1 = Op0;
+ tmpOp0 = Op1;
+ // Simplify below
+ }
}
- }
- if (Op1->hasOneUse() &&
- match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
- if (B == Op0) { // B&(A^B) -> B&(B^A)
- cast<BinaryOperator>(Op1)->swapOperands();
- std::swap(A, B);
+ if (tmpOp1->hasOneUse() &&
+ match(tmpOp1, m_Xor(m_Value(A), m_Value(B)))) {
+ if (B == tmpOp0) {
+ std::swap(A, B);
+ }
+ // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(B));
}
- // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
- // A is originally -1 (or a vector of -1 and undefs), then we enter
- // an endless loop. By checking that A is non-constant we ensure that
- // we will never get to the loop.
- if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
- return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
}
// (A&((~A)|B)) -> A&B
@@ -2227,14 +2228,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (A == Op1) // (B|A)^B == (A|B)^B
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
- return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+ return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
} else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
Op0I->hasOneUse()){
if (A == Op1) // (A&B)^A -> (B&A)^A
std::swap(A, B);
if (B == Op1 && // (B&A)^A == ~B & A
!isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
- return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+ return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
}
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c33dec1740..c7b3ff8504 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -655,15 +654,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (ExtractedElts[Idx] == 0) {
ExtractedElts[Idx] =
- Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
- ConstantInt::get(Type::getInt32Ty(II->getContext()),
- Idx&15, false), "tmp");
+ Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ Builder->getInt32(Idx&15));
}
// Insert this value into the result vector.
Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
- ConstantInt::get(Type::getInt32Ty(II->getContext()),
- i, false), "tmp");
+ Builder->getInt32(i));
}
return CastInst::Create(Instruction::BitCast, Result, CI.getType());
}
@@ -732,9 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
- // If the stack restore is in a return/unwind block and if there are no
- // allocas or calls between the restore and the return, nuke the restore.
- if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+ // If the stack restore is in a return, resume, or unwind block and if there
+ // are no allocas or calls between the restore and the return, nuke the
+ // restore.
+ if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) ||
+ isa<UnwindInst>(TI)))
return EraseInstFromFunction(CI);
break;
}
@@ -819,6 +818,83 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
return Simplifier.NewInstruction;
}
+static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
+ // Strip off at most one level of pointer casts, looking for an alloca. This
+ // is good enough in practice and simpler than handling any number of casts.
+ Value *Underlying = TrampMem->stripPointerCasts();
+ if (Underlying != TrampMem &&
+ (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
+ return 0;
+ if (!isa<AllocaInst>(Underlying))
+ return 0;
+
+ IntrinsicInst *InitTrampoline = 0;
+ for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
+ I != E; I++) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
+ if (!II)
+ return 0;
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
+ if (InitTrampoline)
+ // More than one init_trampoline writes to this value. Give up.
+ return 0;
+ InitTrampoline = II;
+ continue;
+ }
+ if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
+ // Allow any number of calls to adjust.trampoline.
+ continue;
+ return 0;
+ }
+
+ // No call to init.trampoline found.
+ if (!InitTrampoline)
+ return 0;
+
+ // Check that the alloca is being used in the expected way.
+ if (InitTrampoline->getOperand(0) != TrampMem)
+ return 0;
+
+ return InitTrampoline;
+}
+
+static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
+ Value *TrampMem) {
+ // Visit all the previous instructions in the basic block, and try to find a
+ // init.trampoline which has a direct path to the adjust.trampoline.
+ for (BasicBlock::iterator I = AdjustTramp,
+ E = AdjustTramp->getParent()->begin(); I != E; ) {
+ Instruction *Inst = --I;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
+ II->getOperand(0) == TrampMem)
+ return II;
+ if (Inst->mayWriteToMemory())
+ return 0;
+ }
+ return 0;
+}
+
+// Given a call to llvm.adjust.trampoline, find and return the corresponding
+// call to llvm.init.trampoline if the call to the trampoline can be optimized
+// to a direct call to a function. Otherwise return NULL.
+//
+static IntrinsicInst *FindInitTrampoline(Value *Callee) {
+ Callee = Callee->stripPointerCasts();
+ IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
+ if (!AdjustTramp ||
+ AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
+ return 0;
+
+ Value *TrampMem = AdjustTramp->getOperand(0);
+
+ if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem))
+ return IT;
+ if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
+ return IT;
+ return 0;
+}
+
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -878,10 +954,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
return EraseInstFromFunction(*CS.getInstruction());
}
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
- if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
- if (In->getIntrinsicID() == Intrinsic::init_trampoline)
- return transformCallThroughTrampoline(CS);
+ if (IntrinsicInst *II = FindInitTrampoline(Callee))
+ return transformCallThroughTrampoline(CS, II);
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -1067,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
} else {
Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
false, ParamTy, false);
- Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+ Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy));
}
// Add any parameter attributes.
@@ -1093,7 +1167,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
CastInst::getCastOpcode(*AI, false, PTy, false);
- Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+ Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
} else {
Args.push_back(*AI);
}
@@ -1137,13 +1211,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!NV->getType()->isVoidTy()) {
Instruction::CastOps opcode =
CastInst::getCastOpcode(NC, false, OldRetTy, false);
- NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+ NV = NC = CastInst::Create(opcode, NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
// If this is an invoke instruction, we should insert it after the first
// non-phi, instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+ BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call.
@@ -1162,10 +1236,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return true;
}
-// transformCallThroughTrampoline - Turn a call to a function created by the
-// init_trampoline intrinsic into a direct call to the underlying function.
+// transformCallThroughTrampoline - Turn a call to a function created by
+// init_trampoline / adjust_trampoline intrinsic pair into a direct call to the
+// underlying function.
//
-Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+Instruction *
+InstCombiner::transformCallThroughTrampoline(CallSite CS,
+ IntrinsicInst *Tramp) {
Value *Callee = CS.getCalledValue();
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -1176,8 +1253,8 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return 0;
- IntrinsicInst *Tramp =
- cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+ assert(Tramp &&
+ "transformCallThroughTrampoline called with incorrect CallSite.");
Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
PointerType *NestFPTy = cast<PointerType>(NestF->getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f99e457482..f10e48abf1 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
@@ -121,13 +122,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
} else {
Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
// Insert before the alloca, not before the cast.
- Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+ Amt = AllocaBuilder.CreateMul(Amt, NumElements);
}
if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
- Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+ Amt = AllocaBuilder.CreateAdd(Amt, Off);
}
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
@@ -456,7 +457,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
if (DestTy->getScalarSizeInBits() == 1) {
Constant *One = ConstantInt::get(Src->getType(), 1);
- Src = Builder->CreateAnd(Src, One, "tmp");
+ Src = Builder->CreateAnd(Src, One);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
@@ -518,7 +519,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
In->getType()->getScalarSizeInBits()-1);
In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+ In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
@@ -572,7 +573,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder->CreateXor(In, One, "tmp");
+ In = Builder->CreateXor(In, One);
}
if (CI.getType() == In->getType())
@@ -820,7 +821,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
AndValue));
}
if (SrcSize > DstSize) {
- Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+ Value *Trunc = Builder->CreateTrunc(A, CI.getType());
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
return BinaryOperator::CreateAnd(Trunc,
ConstantInt::get(Trunc->getType(),
@@ -867,7 +868,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *TI0 = TI->getOperand(0);
if (TI0->getType() == CI.getType()) {
Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
- Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+ Value *NewAnd = Builder->CreateAnd(TI0, ZC);
return BinaryOperator::CreateXor(NewAnd, ZC);
}
}
@@ -900,7 +901,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Op0->getType()->getScalarSizeInBits()-1);
Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
if (In->getType() != CI.getType())
- In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+ In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
if (Pred == ICmpInst::ICMP_SGT)
In = Builder->CreateNot(In, In->getName()+".not");
@@ -1306,13 +1307,13 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
TD->getPointerSizeInBits()) {
Value *P = Builder->CreateTrunc(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()), "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new IntToPtrInst(P, CI.getType());
}
if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
TD->getPointerSizeInBits()) {
Value *P = Builder->CreateZExt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()), "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new IntToPtrInst(P, CI.getType());
}
}
@@ -1359,9 +1360,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// and bitcast the result. This eliminates one bitcast, potentially
// two.
Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase,
- NewIndices.begin(), NewIndices.end()) :
- Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1382,14 +1382,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
if (TD) {
if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()),
- "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new TruncInst(P, CI.getType());
}
if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()),
- "tmp");
+ TD->getIntPtrType(CI.getContext()));
return new ZExtInst(P, CI.getType());
}
}
@@ -1693,7 +1691,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
- return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end());
+ return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b8ce4b7eb9..bb1cbfade3 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
@@ -78,7 +79,7 @@ static bool HasSubOverflow(ConstantInt *Result,
bool IsSigned) {
if (!IsSigned)
return Result->getValue().ugt(In1->getValue());
-
+
if (In2->isNegative())
return Result->getValue().slt(In1->getValue());
@@ -128,7 +129,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
// True if LHS u> RHS and RHS == high-bit-mask - 1
TrueIfSigned = true;
return RHS->isMaxValue(true);
- case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_UGE:
// True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
TrueIfSigned = true;
return RHS->getValue().isSignBit();
@@ -143,7 +144,7 @@ static bool isHighOnes(const ConstantInt *CI) {
return (~CI->getValue() + 1).isPowerOf2();
}
-/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a
/// set of known zero and one bits, compute the maximum and minimum values that
/// could have the specified known zero and known one bits, returning them in
/// min/max.
@@ -160,7 +161,7 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
// bit if it is unknown.
Min = KnownOne;
Max = KnownOne|UnknownBits;
-
+
if (UnknownBits.isNegative()) { // Sign bit is unknown
Min.setBit(Min.getBitWidth()-1);
Max.clearBit(Max.getBitWidth()-1);
@@ -179,7 +180,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
KnownZero.getBitWidth() == Max.getBitWidth() &&
"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
APInt UnknownBits = ~(KnownZero|KnownOne);
-
+
// The minimum value is when the unknown bits are all zeros.
Min = KnownOne;
// The maximum value is when the unknown bits are all ones.
@@ -201,10 +202,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
if (!GEP->isInBounds() && TD == 0) return 0;
-
+
ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
if (Init == 0 || Init->getNumOperands() > 1024) return 0;
-
+
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
//
@@ -219,15 +220,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// type they index. Collect the indices. This is typically for arrays of
// structs.
SmallVector<unsigned, 4> LaterIndices;
-
+
Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (Idx == 0) return 0; // Variable index.
-
+
uint64_t IdxVal = Idx->getZExtValue();
if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
-
+
if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
@@ -236,14 +237,14 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
} else {
return 0; // Unknown type.
}
-
+
LaterIndices.push_back(IdxVal);
}
-
+
enum { Overdefined = -3, Undefined = -2 };
// Variables for our state machines.
-
+
// FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
// "i == 47 | i == 87", where 47 is the first index the condition is true for,
// and 87 is the second (and last) index. FirstTrueElement is -2 when
@@ -254,7 +255,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
// form "i != 47 & i != 87". Same state transitions as for true elements.
int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
-
+
/// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
/// define a state machine that triggers for ranges of values that the index
/// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
@@ -262,25 +263,25 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
/// index in the range (inclusive). We use -2 for undefined here because we
/// use relative comparisons and don't want 0-1 to match -1.
int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
-
+
// MagicBitvector - This is a magic bitvector where we set a bit if the
// comparison is true for element 'i'. If there are 64 elements or less in
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
-
-
+
+
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
Constant *Elt = Init->getOperand(i);
-
+
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty())
Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
-
+
// If the element is masked, handle it.
if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
-
+
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
CompareRHS, TD);
@@ -294,15 +295,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
FalseRangeEnd = i;
continue;
}
-
+
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
if (!isa<ConstantInt>(C)) return 0;
-
+
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
-
+
// State machine for single/double/range index comparison.
if (IsTrueForElt) {
// Update the TrueElement state machine.
@@ -314,7 +315,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
SecondTrueElement = i;
else
SecondTrueElement = Overdefined;
-
+
// Update range state machine.
if (TrueRangeEnd == (int)i-1)
TrueRangeEnd = i;
@@ -331,7 +332,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
SecondFalseElement = i;
else
SecondFalseElement = Overdefined;
-
+
// Update range state machine.
if (FalseRangeEnd == (int)i-1)
FalseRangeEnd = i;
@@ -339,12 +340,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
FalseRangeEnd = Overdefined;
}
}
-
-
+
+
// If this element is in range, update our magic bitvector.
if (i < 64 && IsTrueForElt)
MagicBitvector |= 1ULL << i;
-
+
// If all of our states become overdefined, bail out early. Since the
// predicate is expensive, only check it every 8 elements. This is only
// really useful for really huge arrays.
@@ -364,20 +365,20 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if (!GEP->isInBounds() &&
Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
-
+
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
// None true -> false.
if (FirstTrueElement == Undefined)
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
-
+
Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
-
+
// True for one element -> 'i == 47'.
if (SecondTrueElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
-
+
// True for two elements -> 'i == 47 | i == 72'.
Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
@@ -391,36 +392,36 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// None false -> true.
if (FirstFalseElement == Undefined)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
-
+
Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
// False for one element -> 'i != 47'.
if (SecondFalseElement == Undefined)
return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
-
+
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
-
+
// If the comparison can be replaced with a range comparison for the elements
// where it is true, emit the range check.
if (TrueRangeEnd != Overdefined) {
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
-
+
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
Idx = Builder->CreateAdd(Idx, Offs);
}
-
+
Value *End = ConstantInt::get(Idx->getType(),
TrueRangeEnd-FirstTrueElement+1);
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
}
-
+
// False range check.
if (FalseRangeEnd != Overdefined) {
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
@@ -429,13 +430,13 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
Idx = Builder->CreateAdd(Idx, Offs);
}
-
+
Value *End = ConstantInt::get(Idx->getType(),
FalseRangeEnd-FirstFalseElement);
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
-
-
+
+
// If a 32-bit or 64-bit magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
@@ -451,7 +452,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
}
-
+
return 0;
}
@@ -465,11 +466,11 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
/// to generate the first by knowing that pointer arithmetic doesn't overflow.
///
/// If we can't emit an optimized form for this expression, this returns null.
-///
+///
static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
TargetData &TD = *IC.getTargetData();
gep_type_iterator GTI = gep_type_begin(GEP);
-
+
// Check to see if this gep only has a single variable index. If so, and if
// any constant indices are a multiple of its scale, then we can compute this
// in terms of the scale of the variable index. For example, if the GEP
@@ -481,7 +482,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
-
+
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
@@ -494,24 +495,24 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
break;
}
}
-
+
// If there are no variable indices, we must have a constant offset, just
// evaluate it the general way.
if (i == e) return 0;
-
+
Value *VariableIdx = GEP->getOperand(i);
// Determine the scale factor of the variable element. For example, this is
// 4 if the variable index is into an array of i32.
uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
-
+
// Verify that there are no other variable indices. If so, emit the hard way.
for (++i, ++GTI; i != e; ++i, ++GTI) {
ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!CI) return 0;
-
+
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
-
+
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
@@ -520,7 +521,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
Offset += Size*CI->getSExtValue();
}
}
-
+
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
@@ -535,14 +536,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
}
return VariableIdx;
}
-
+
// Otherwise, there is an index. The computation we will do will be modulo
// the pointer size, so get it.
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
-
+
Offset &= PtrSizeMask;
VariableScale &= PtrSizeMask;
-
+
// To do this transformation, any constant index must be a multiple of the
// variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
// but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
@@ -550,7 +551,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
int64_t NewOffs = Offset / (int64_t)VariableScale;
if (Offset != NewOffs*(int64_t)VariableScale)
return 0;
-
+
// Okay, we can do this evaluation. Start by converting the index to intptr.
Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
if (VariableIdx->getType() != IntPtrTy)
@@ -576,7 +577,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
-
+
// If not, synthesize the offset the hard way.
if (Offset == 0)
Offset = EmitGEPOffset(GEPLHS);
@@ -686,7 +687,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
}
-
+
// (X+4) == X -> false.
if (Pred == ICmpInst::ICMP_EQ)
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
@@ -698,22 +699,22 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
-
+
// (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
- Value *R =
+ Value *R =
ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
}
-
+
// (X+1) >u X --> X <u (0-1) --> X != 255
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
-
+
unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
ConstantInt *SMax = ConstantInt::get(X->getContext(),
APInt::getSignedMaxValue(BitWidth));
@@ -726,14 +727,14 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
-
+
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
// (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
// (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
// (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
-
+
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
@@ -745,14 +746,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS) {
ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
const APInt &CmpRHSV = CmpRHS->getValue();
-
- // FIXME: If the operand types don't match the type of the divide
+
+ // FIXME: If the operand types don't match the type of the divide
// then don't attempt this transform. The code below doesn't have the
// logic to deal with a signed divide and an unsigned compare (and
- // vice versa). This is because (x /s C1) <s C2 produces different
+ // vice versa). This is because (x /s C1) <s C2 produces different
// results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
- // (x /u C1) <u C2. Simply casting the operands and result won't
- // work. :( The if statement below tests that condition and bails
+ // (x /u C1) <u C2. Simply casting the operands and result won't
+ // work. :( The if statement below tests that condition and bails
// if it finds it.
bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
@@ -768,14 +769,14 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
}
// Compute Prod = CI * DivRHS. We are essentially solving an equation
- // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
- // C2 (CI). By solving for X we can turn this into a range check
- // instead of computing a divide.
+ // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+ // C2 (CI). By solving for X we can turn this into a range check
+ // instead of computing a divide.
Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
// Determine if the product overflows by seeing if the product is
// not equal to the divide. Make sure we do the same kind of divide
- // as in the LHS instruction that we're folding.
+ // as in the LHS instruction that we're folding.
bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
@@ -785,9 +786,9 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
/// If the division is known to be exact, then there is no remainder from the
/// divide, so the covered range size is unit, otherwise it is the divisor.
ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
-
+
// Figure out the interval that is being checked. For example, a comparison
- // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+ // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
// Compute this interval based on the constants involved and the signedness of
// the compare/divide. This computes a half-open interval, keeping track of
// whether either value in the interval overflows. After analysis each
@@ -805,7 +806,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// to the same result value.
HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
}
-
+
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
if (CmpRHSV == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
@@ -848,7 +849,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
if (!HiOverflow)
HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
}
-
+
// Dividing by a negative swaps the condition. LT <-> GT
Pred = ICmpInst::getSwappedPredicate(Pred);
}
@@ -901,7 +902,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
ConstantInt *ShAmt) {
const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
-
+
// Check that the shift amount is in range. If not, don't perform
// undefined shifts. When the shift is visited it will be
// simplified.
@@ -909,48 +910,48 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
if (ShAmtVal >= TypeBits || ShAmtVal == 0)
return 0;
-
+
if (!ICI.isEquality()) {
// If we have an unsigned comparison and an ashr, we can't simplify this.
// Similarly for signed comparisons with lshr.
if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
return 0;
-
+
// Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
// by a power of 2. Since we already have logic to simplify these,
// transform to div and then simplify the resultant comparison.
if (Shr->getOpcode() == Instruction::AShr &&
(!Shr->isExact() || ShAmtVal == TypeBits - 1))
return 0;
-
+
// Revisit the shift (to delete it).
Worklist.Add(Shr);
-
+
Constant *DivCst =
ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
-
+
Value *Tmp =
Shr->getOpcode() == Instruction::AShr ?
Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
-
+
ICI.setOperand(0, Tmp);
-
+
// If the builder folded the binop, just return it.
BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
if (TheDiv == 0)
return &ICI;
-
+
// Otherwise, fold this div/compare.
assert(TheDiv->getOpcode() == Instruction::SDiv ||
TheDiv->getOpcode() == Instruction::UDiv);
-
+
Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
assert(Res && "This div/cst should have folded!");
return Res;
}
-
-
+
+
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
APInt Comp = CmpRHSV << ShAmtVal;
@@ -959,25 +960,25 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
Comp = Comp.lshr(ShAmtVal);
else
Comp = Comp.ashr(ShAmtVal);
-
+
if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
-
+
// Otherwise, check to see if the bits shifted out are known to be zero.
// If so, we can compare against the unshifted value:
// (X & 4) >> 1 == 2 --> (X & 4) == 4.
if (Shr->hasOneUse() && Shr->isExact())
return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
-
+
if (Shr->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
-
+
Value *And = Builder->CreateAnd(Shr->getOperand(0),
Mask, Shr->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
@@ -992,7 +993,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Instruction *LHSI,
ConstantInt *RHS) {
const APInt &RHSV = RHS->getValue();
-
+
switch (LHSI->getOpcode()) {
case Instruction::Trunc:
if (ICI.isEquality() && LHSI->hasOneUse()) {
@@ -1003,7 +1004,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
-
+
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
// Pull in the high bits from known-ones set.
@@ -1014,7 +1015,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
break;
-
+
case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
// If this is a comparison that tests the signbit (X < 0) or (x > -1),
@@ -1022,7 +1023,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
(ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
Value *CompareVal = LHSI->getOperand(0);
-
+
// If the sign bit of the XorCST is not set, there is no change to
// the operation, just stop using the Xor.
if (!XorCST->isNegative()) {
@@ -1030,13 +1031,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Worklist.Add(LHSI);
return &ICI;
}
-
+
// Was the old condition true if the operand is positive?
bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
-
+
// If so, the new one isn't.
isTrueIfPositive ^= true;
-
+
if (isTrueIfPositive)
return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
SubOne(RHS));
@@ -1075,13 +1076,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
LHSI->getOperand(0)->hasOneUse()) {
ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
-
+
// If the LHS is an AND of a truncating cast, we can widen the
// and/compare to be the input width without changing the value
// produced, eliminating a cast.
if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
// We can do this transformation if either the AND constant does not
- // have its sign bit set or if it is an equality comparison.
+ // have its sign bit set or if it is an equality comparison.
// Extending a relational comparison when we're checking the sign
// bit would not work.
if (ICI.isEquality() ||
@@ -1118,12 +1119,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
if (Shift && !Shift->isShift())
Shift = 0;
-
+
ConstantInt *ShAmt;
ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
Type *AndTy = AndCST->getType(); // Type of the and.
-
+
// We can fold this as long as we can't shift unknown bits
// into the mask. This can only happen with signed shift
// rights, as they sign-extend.
@@ -1134,20 +1135,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// of the bits shifted in could be tested after the mask.
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
-
+
uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
- if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
+ if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
AndCST->getValue()) == 0)
CanFold = true;
}
-
+
if (CanFold) {
Constant *NewCst;
if (Shift->getOpcode() == Instruction::Shl)
NewCst = ConstantExpr::getLShr(RHS, ShAmt);
else
NewCst = ConstantExpr::getShl(RHS, ShAmt);
-
+
// Check to see if we are shifting out any of the bits being
// compared.
if (ConstantExpr::get(Shift->getOpcode(),
@@ -1175,7 +1176,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
}
-
+
// Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is
// preferable because it allows the C<<Y expression to be hoisted out
// of a loop if Y is invariant and X is not.
@@ -1185,21 +1186,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// Compute C << Y.
Value *NS;
if (Shift->getOpcode() == Instruction::LShr) {
- NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+ NS = Builder->CreateShl(AndCST, Shift->getOperand(1));
} else {
// Insert a logical shift.
- NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+ NS = Builder->CreateLShr(AndCST, Shift->getOperand(1));
}
-
+
// Compute X & (C << Y).
- Value *NewAnd =
+ Value *NewAnd =
Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
-
+
ICI.setOperand(0, NewAnd);
return &ICI;
}
}
-
+
// Try to optimize things like "A[i]&42 == 0" to index computations.
if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
if (GetElementPtrInst *GEP =
@@ -1234,19 +1235,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
if (!ShAmt) break;
-
+
uint32_t TypeBits = RHSV.getBitWidth();
-
+
// Check that the shift amount is in range. If not, don't perform
// undefined shifts. When the shift is visited it will be
// simplified.
if (ShAmt->uge(TypeBits))
break;
-
+
if (ICI.isEquality()) {
// If we are comparing against bits always shifted out, the
// comparison cannot succeed.
@@ -1259,34 +1260,34 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
return ReplaceInstUsesWith(ICI, Cst);
}
-
+
// If the shift is NUW, then it is just shifting out zeros, no need for an
// AND.
if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantExpr::getLShr(RHS, ShAmt));
-
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
Constant *Mask =
- ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
+ ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits,
TypeBits-ShAmtVal));
-
+
Value *And =
Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And,
ConstantExpr::getLShr(RHS, ShAmt));
}
}
-
+
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (LHSI->hasOneUse() &&
isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
// (X << 31) <s 0 --> (X&1) != 0
Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
- APInt::getOneBitSet(TypeBits,
+ APInt::getOneBitSet(TypeBits,
TypeBits-ShAmt->getZExtValue()-1));
Value *And =
Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
@@ -1295,7 +1296,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
case Instruction::AShr: {
// Handle equality comparisons of shift-by-constant.
@@ -1312,13 +1313,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
case Instruction::SDiv:
case Instruction::UDiv:
// Fold: icmp pred ([us]div X, C1), C2 -> range test
- // Fold this div into the comparison, producing a range check.
- // Determine, based on the divide type, what the range is being
- // checked. If there is an overflow on the low or high side, remember
+ // Fold this div into the comparison, producing a range check.
+ // Determine, based on the divide type, what the range is being
+ // checked. If there is an overflow on the low or high side, remember
// it, otherwise compute the range [low, hi) bounding the new value.
// See: InsertRangeTest above for the kinds of replacements possible.
if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
@@ -1357,12 +1358,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
}
-
+
// Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
if (ICI.isEquality()) {
bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-
- // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
+
+ // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
// the second operand is a constant, simplify a bit.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
switch (BO->getOpcode()) {
@@ -1389,7 +1390,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// Replace ((add A, B) != 0) with (A != -B) if A or B is
// efficiently invertible, or if the add has just this one use.
Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
-
+
if (Value *NegVal = dyn_castNegVal(BOp1))
return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
if (Value *NegVal = dyn_castNegVal(BOp0))
@@ -1432,11 +1433,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Constant *NotCI = ConstantExpr::getNot(RHS);
if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
return ReplaceInstUsesWith(ICI,
- ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
isICMP_NE));
}
break;
-
+
case Instruction::And:
if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
// If bits are being compared against that are and'd out, then the
@@ -1445,7 +1446,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return ReplaceInstUsesWith(ICI,
ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
isICMP_NE));
-
+
// If we have ((X & C) == C), turn it into ((X & C) != 0).
if (RHS == BOC && RHSV.isPowerOf2())
return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
@@ -1460,16 +1461,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (BOC->getValue().isSignBit()) {
Value *X = BO->getOperand(0);
Constant *Zero = Constant::getNullValue(X->getType());
- ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::Predicate pred = isICMP_NE ?
ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
return new ICmpInst(pred, X, Zero);
}
-
+
// ((X & ~7) == 0) --> X < 8
if (RHSV == 0 && isHighOnes(BOC)) {
Value *X = BO->getOperand(0);
Constant *NegX = ConstantExpr::getNeg(BOC);
- ICmpInst::Predicate pred = isICMP_NE ?
+ ICmpInst::Predicate pred = isICMP_NE ?
ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
return new ICmpInst(pred, X, NegX);
}
@@ -1521,7 +1522,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
Type *DestTy = LHSCI->getType();
Value *RHSCIOp;
- // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
+ // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
TD->getPointerSizeInBits() ==
@@ -1539,7 +1540,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
if (RHSOp)
return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
}
-
+
// The code below only handles extension cast instructions, so far.
// Enforce this.
if (LHSCI->getOpcode() != Instruction::ZExt &&
@@ -1552,9 +1553,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
// Not an extension from the same type?
RHSCIOp = CI->getOperand(0);
- if (RHSCIOp->getType() != LHSCIOp->getType())
+ if (RHSCIOp->getType() != LHSCIOp->getType())
return 0;
-
+
// If the signedness of the two casts doesn't agree (i.e. one is a sext
// and the other is a zext), then we can't handle this.
if (CI->getOpcode() != LHSCI->getOpcode())
@@ -1599,7 +1600,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
}
- // The re-extended constant changed so the constant cannot be represented
+ // The re-extended constant changed so the constant cannot be represented
// in the shorter type. Consequently, we cannot emit a simple comparison.
// All the cases that fold to true or false will have already been handled
// by SimplifyICmpInst, so only deal with the tricky case.
@@ -1637,26 +1638,26 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// llvm.sadd.with.overflow. To do this, we have to replace the original add
// with a narrower add, and discard the add-with-constant that is part of the
// range check (if we can't eliminate it, this isn't profitable).
-
+
// In order to eliminate the add-with-constant, the compare can be its only
// use.
Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
if (!AddWithCst->hasOneUse()) return 0;
-
+
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
if (!CI2->getValue().isPowerOf2()) return 0;
unsigned NewWidth = CI2->getValue().countTrailingZeros();
if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
-
+
// The width of the new add formed is 1 more than the bias.
++NewWidth;
-
+
// Check to see that CI1 is an all-ones value with NewWidth bits.
if (CI1->getBitWidth() == NewWidth ||
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
return 0;
-
- // In order to replace the original add with a narrower
+
+ // In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
// and truncates that discard the high bits of the add. Verify that this is
// the case.
@@ -1664,7 +1665,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
UI != E; ++UI) {
if (*UI == AddWithCst) continue;
-
+
// Only accept truncates for now. We would really like a nice recursive
// predicate like SimplifyDemandedBits, but which goes downwards the use-def
// chain to see which bits of a value are actually demanded. If the
@@ -1674,32 +1675,32 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
if (TI == 0 ||
TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
}
-
+
// If the pattern matches, truncate the inputs to the narrower type and
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
Module *M = I.getParent()->getParent()->getParent();
-
+
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
NewType);
InstCombiner::BuilderTy *Builder = IC.Builder;
-
+
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
Builder->SetInsertPoint(OrigAdd);
-
+
Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
-
+
// The inner add was the result of the narrow add, zero extended to the
// wider type. Replace it with the result computed by the intrinsic.
IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
-
+
// The original icmp gets replaced with the overflow value.
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
@@ -1709,13 +1710,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
if (!isa<IntegerType>(OrigAddV->getType())) return 0;
-
+
// If the add is a constant expr, then we don't bother transforming it.
Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
if (OrigAdd == 0) return 0;
-
+
Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
-
+
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
InstCombiner::BuilderTy *Builder = IC.Builder;
@@ -1740,13 +1741,13 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
unsigned BitWidth, bool isSignCheck) {
if (isSignCheck)
return APInt::getSignBit(BitWidth);
-
+
ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
if (!CI) return APInt::getAllOnesValue(BitWidth);
const APInt &RHS = CI->getValue();
-
+
switch (I.getPredicate()) {
- // For a UGT comparison, we don't care about any bits that
+ // For a UGT comparison, we don't care about any bits that
// correspond to the trailing ones of the comparand. The value of these
// bits doesn't impact the outcome of the comparison, because any value
// greater than the RHS must differ in a bit higher than these due to carry.
@@ -1755,7 +1756,7 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
return ~lowBitsSet;
}
-
+
// Similarly, for a ULT comparison, we don't care about the trailing zeros.
// Any value less than the RHS must differ in a higher bit because of carries.
case ICmpInst::ICMP_ULT: {
@@ -1763,17 +1764,17 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
return ~lowBitsSet;
}
-
+
default:
return APInt::getAllOnesValue(BitWidth);
}
-
+
}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
+
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
@@ -1782,10 +1783,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
std::swap(Op0, Op1);
Changed = true;
}
-
+
if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
-
+
Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
@@ -1835,13 +1836,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
BitWidth = Ty->getScalarSizeInBits();
else if (TD) // Pointers require TD info to get their size.
BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
-
+
bool isSignBit = false;
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
Value *A = 0, *B = 0;
-
+
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic function. The source performs an
// addition in wider type, and explicitly checks for overflow using
@@ -1849,9 +1850,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// sadd_with_overflow intrinsic.
//
// TODO: This could probably be generalized to handle other overflow-safe
- // operations if we worked out the formulas to compute the appropriate
+ // operations if we worked out the formulas to compute the appropriate
// magic constants.
- //
+ //
// sum = a + b
// if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
{
@@ -1861,14 +1862,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
return Res;
}
-
+
// (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
if (I.isEquality() && CI->isZero() &&
match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
// (icmp cond A B) if cond is equality
return new ICmpInst(I.getPredicate(), A, B);
}
-
+
// If we have an icmp le or icmp ge instruction, turn it into the
// appropriate icmp lt or icmp gt instruction. This allows us to rely on
// them being folded in the code below. The SimplifyICmpInst code has
@@ -1892,7 +1893,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
ConstantInt::get(CI->getContext(), CI->getValue()-1));
}
-
+
// If this comparison is a normal comparison, it demands all
// bits, if it is a sign bit comparison, it only demands the sign bit.
bool UnusedBit;
@@ -1948,7 +1949,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_EQ: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-
+
// If all bits are known zero except for one, then we know at most one
// bit is set. If the comparison is against zero, then this is a check
// to see if *that* bit is set.
@@ -1960,7 +1961,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
-
+
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) == 0" into "x != 3".
Value *X = 0;
@@ -1969,7 +1970,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_NE, X,
ConstantInt::get(X->getType(), CmpVal));
}
-
+
// If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
// then turn "((8 >>u x)&1) == 0" into "x != 3".
const APInt *CI;
@@ -1979,13 +1980,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
+
break;
}
case ICmpInst::ICMP_NE: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-
+
// If all bits are known zero except for one, then we know at most one
// bit is set. If the comparison is against zero, then this is a check
// to see if *that* bit is set.
@@ -1997,7 +1998,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
-
+
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) != 0" into "x == 3".
Value *X = 0;
@@ -2006,7 +2007,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(ICmpInst::ICMP_EQ, X,
ConstantInt::get(X->getType(), CmpVal));
}
-
+
// If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
// then turn "((8 >>u x)&1) != 0" into "x == 3".
const APInt *CI;
@@ -2016,7 +2017,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
ConstantInt::get(X->getType(),
CI->countTrailingZeros()));
}
-
+
break;
}
case ICmpInst::ICMP_ULT:
@@ -2137,9 +2138,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// See if we are doing a comparison between a constant and an instruction that
// can be folded into the comparison.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- // Since the RHS is a ConstantInt (CI), if the left hand side is an
- // instruction, see if that instruction also has constants so that the
- // instruction can be folded into the icmp
+ // Since the RHS is a ConstantInt (CI), if the left hand side is an
+ // instruction, see if that instruction also has constants so that the
+ // instruction can be folded into the icmp
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
return Res;
@@ -2194,7 +2195,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(RHSC->getContext()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -2227,8 +2228,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// values. If the ptr->ptr cast can be stripped off both arguments, we do so
// now.
if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
- if (Op0->getType()->isPointerTy() &&
- (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ if (Op0->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
// We keep moving the cast from the left operand over to the right
// operand, where it can often be eliminated completely.
Op0 = CI->getOperand(0);
@@ -2250,7 +2251,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(I.getPredicate(), Op0, Op1);
}
}
-
+
if (isa<CastInst>(Op0)) {
// Handle the special case of: icmp (cast bool to X), <cst>
// This comes up when you have code like
@@ -2384,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(Pred, BO0->getOperand(0),
BO1->getOperand(0));
}
-
+
if (CI->isMaxValue(true)) {
ICmpInst::Predicate Pred = I.isSigned()
? I.getUnsignedPredicate()
@@ -2404,7 +2405,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Mask = -1 >> count-trailing-zeros(Cst).
if (!CI->isZero() && !CI->isOne()) {
const APInt &AP = CI->getValue();
- ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
APInt::getLowBitsSet(AP.getBitWidth(),
AP.getBitWidth() -
AP.countTrailingZeros()));
@@ -2438,7 +2439,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
}
-
+
{ Value *A, *B;
// ~x < ~y --> y < x
// ~x < cst --> ~cst < x
@@ -2452,11 +2453,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// (a+b) <u a --> llvm.uadd.with.overflow.
// (a+b) <u b --> llvm.uadd.with.overflow.
if (I.getPredicate() == ICmpInst::ICMP_ULT &&
- match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Add(m_Value(A), m_Value(B))) &&
(Op1 == A || Op1 == B))
if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
return R;
-
+
// a >u (a+b) --> llvm.uadd.with.overflow.
// b >u (a+b) --> llvm.uadd.with.overflow.
if (I.getPredicate() == ICmpInst::ICMP_UGT &&
@@ -2465,7 +2466,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
return R;
}
-
+
if (I.isEquality()) {
Value *A, *B, *C, *D;
@@ -2483,10 +2484,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
Constant *NC = ConstantInt::get(I.getContext(),
C1->getValue() ^ C2->getValue());
- Value *Xor = Builder->CreateXor(C, NC, "tmp");
+ Value *Xor = Builder->CreateXor(C, NC);
return new ICmpInst(I.getPredicate(), A, Xor);
}
-
+
// A^B == A^D -> B == D
if (A == C) return new ICmpInst(I.getPredicate(), B, D);
if (A == D) return new ICmpInst(I.getPredicate(), B, C);
@@ -2494,7 +2495,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (B == D) return new ICmpInst(I.getPredicate(), A, C);
}
}
-
+
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
(A == Op0 || B == Op0)) {
// A == (A^B) -> B == 0
@@ -2504,10 +2505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
- if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+ if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
Value *X = 0, *Y = 0, *Z = 0;
-
+
if (A == C) {
X = B; Y = D; Z = A;
} else if (A == D) {
@@ -2517,16 +2518,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
} else if (B == D) {
X = A; Y = C; Z = B;
}
-
+
if (X) { // Build (X^Y) & Z
- Op1 = Builder->CreateXor(X, Y, "tmp");
- Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+ Op1 = Builder->CreateXor(X, Y);
+ Op1 = Builder->CreateAnd(Op1, Z);
I.setOperand(0, Op1);
I.setOperand(1, Constant::getNullValue(Op1->getType()));
return &I;
}
}
-
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -2539,21 +2540,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// when it exposes other optimizations.
!A->hasOneUse()) {
unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
-
+
if (ShAmt < ASize) {
APInt MaskV =
APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
MaskV <<= ShAmt;
-
+
APInt CmpV = Cst1->getValue().zext(ASize);
CmpV <<= ShAmt;
-
+
Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
}
}
}
-
+
{
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
@@ -2579,31 +2580,31 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Constant *RHSC) {
if (!isa<ConstantFP>(RHSC)) return 0;
const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
-
+
// Get the width of the mantissa. We don't want to hack on conversions that
// might lose information from the integer, e.g. "i64 -> float"
int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
if (MantissaWidth == -1) return 0; // Unknown.
-
+
// Check to see that the input is converted from an integer type that is small
// enough that preserves all bits. TODO: check here for "known" sign bits.
// This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
-
+
// If this is a uitofp instruction, we need an extra bit to hold the sign.
bool LHSUnsigned = isa<UIToFPInst>(LHSI);
if (LHSUnsigned)
++InputSize;
-
+
// If the conversion would lose info, don't hack on this.
if ((int)InputSize > MantissaWidth)
return 0;
-
+
// Otherwise, we can potentially simplify the comparison. We know that it
// will always come through as an integer value and we know the constant is
// not a NAN (it would have been previously simplified).
assert(!RHS.isNaN() && "NaN comparison not already folded!");
-
+
ICmpInst::Predicate Pred;
switch (I.getPredicate()) {
default: llvm_unreachable("Unexpected predicate!");
@@ -2636,15 +2637,15 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
case FCmpInst::FCMP_UNO:
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
}
-
+
IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
-
+
// Now we know that the APFloat is a normal number, zero or inf.
-
+
// See if the FP constant is too large for the integer. For example,
// comparing an i8 to 300.0.
unsigned IntWidth = IntTy->getScalarSizeInBits();
-
+
if (!LHSUnsigned) {
// If the RHS value is > SignedMax, fold the comparison. This handles +INF
// and large values.
@@ -2670,7 +2671,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
}
}
-
+
if (!LHSUnsigned) {
// See if the RHS value is < SignedMin.
APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
@@ -2766,7 +2767,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
bool Changed = false;
-
+
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
@@ -2776,7 +2777,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
+
if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
@@ -2792,7 +2793,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
I.setPredicate(FCmpInst::FCMP_UNO);
I.setOperand(1, Constant::getNullValue(Op0->getType()));
return &I;
-
+
case FCmpInst::FCMP_ORD: // True if ordered (no nans)
case FCmpInst::FCMP_OEQ: // True if ordered and equal
case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal
@@ -2803,7 +2804,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return &I;
}
}
-
+
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
@@ -2836,10 +2837,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
APFloat F = RHSF->getValueAPF();
F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy);
- // Avoid lossy conversions and denormals.
+ // Avoid lossy conversions and denormals. Zero is a special case
+ // that's OK to convert.
+ APFloat Fabs = F;
+ Fabs.clearSign();
if (!Lossy &&
- F.compare(APFloat::getSmallestNormalized(*Sem)) !=
- APFloat::cmpLessThan)
+ ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) !=
+ APFloat::cmpLessThan) || Fabs.isZero()))
+
return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
ConstantFP::get(RHSC->getContext(), F));
break;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index bdd2edb991..7446a51a4d 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -58,8 +58,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
Idx[0] = NullIdx;
Idx[1] = NullIdx;
Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
- New->getName()+".sub");
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -113,7 +112,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Value *Idxs[2];
Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
Idxs[1] = Idxs[0];
- CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+ CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
}
@@ -133,6 +132,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
+ NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
// Now cast the result of the load.
return new BitCastInst(NewLoad, LI.getType());
}
@@ -163,8 +163,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
- // None of the following transforms are legal for volatile loads.
- if (LI.isVolatile()) return 0;
+ // None of the following transforms are legal for volatile/atomic loads.
+ // FIXME: Some of it is okay for atomic loads; needs refactoring.
+ if (!LI.isSimple()) return 0;
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
@@ -327,8 +328,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
- CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
- NewGEPIndices.end());
+ CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
@@ -370,21 +370,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
Value *Val = SI.getOperand(0);
Value *Ptr = SI.getOperand(1);
- // If the RHS is an alloca with a single use, zapify the store, making the
- // alloca dead.
- if (!SI.isVolatile()) {
- if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
- return EraseInstFromFunction(SI);
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
- if (isa<AllocaInst>(GEP->getOperand(0))) {
- if (GEP->getOperand(0)->hasOneUse())
- return EraseInstFromFunction(SI);
- }
- }
- }
- }
-
// Attempt to improve the alignment.
if (TD) {
unsigned KnownAlign =
@@ -400,6 +385,23 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
SI.setAlignment(EffectiveStoreAlign);
}
+ // Don't hack volatile/atomic stores.
+ // FIXME: Some bits are legal for atomic stores; needs refactoring.
+ if (!SI.isSimple()) return 0;
+
+ // If the RHS is an alloca with a single use, zapify the store, making the
+ // alloca dead.
+ if (Ptr->hasOneUse()) {
+ if (isa<AllocaInst>(Ptr))
+ return EraseInstFromFunction(SI);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (isa<AllocaInst>(GEP->getOperand(0))) {
+ if (GEP->getOperand(0)->hasOneUse())
+ return EraseInstFromFunction(SI);
+ }
+ }
+ }
+
// Do really simple DSE, to catch cases where there are several consecutive
// stores to the same location, separated by a few arithmetic operations. This
// situation often occurs with bitfield accesses.
@@ -417,8 +419,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
- if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
- SI.getOperand(1))) {
+ if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+ SI.getOperand(1))) {
++NumDeadStore;
++BBI;
EraseInstFromFunction(*PrevSI);
@@ -432,7 +434,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// then *this* store is dead (X = load P; store X -> P).
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
- !SI.isVolatile())
+ LI->isSimple())
return EraseInstFromFunction(SI);
// Otherwise, this is a load from some other location. Stores before it
@@ -444,9 +446,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
}
-
-
- if (SI.isVolatile()) return 0; // Don't hack volatile stores.
// store X, null -> turns into 'unreachable' in SimplifyCFG
if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
@@ -549,11 +548,11 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
--BBI;
}
- // If this isn't a store, isn't a store to the same location, or if the
- // alignments differ, bail out.
+ // If this isn't a store, isn't a store to the same location, or is not the
+ // right kind of store, bail out.
OtherStore = dyn_cast<StoreInst>(BBI);
if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
- OtherStore->getAlignment() != SI.getAlignment())
+ !SI.isSameOperationAs(OtherStore))
return false;
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
@@ -569,7 +568,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Check to see if we find the matching store.
if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
if (OtherStore->getOperand(1) != SI.getOperand(1) ||
- OtherStore->getAlignment() != SI.getAlignment())
+ !SI.isSameOperationAs(OtherStore))
return false;
break;
}
@@ -601,10 +600,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Advance to a place where it is safe to insert the new store and
// insert it.
- BBI = DestBB->getFirstNonPHI();
+ BBI = DestBB->getFirstInsertionPt();
StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
- OtherStore->isVolatile(),
- SI.getAlignment());
+ SI.isVolatile(),
+ SI.getAlignment(),
+ SI.getOrdering(),
+ SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
NewSI->setDebugLoc(OtherStore->getDebugLoc());
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 53341ccbfc..7f48125a97 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -38,7 +38,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
m_Value(B))) &&
// The "1" can be any value known to be a power of 2.
isPowerOfTwo(PowerOf2, IC.getTargetData())) {
- A = IC.Builder->CreateSub(A, B, "tmp");
+ A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
@@ -131,7 +131,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
- Value *Add = Builder->CreateMul(X, CI, "tmp");
+ Value *Add = Builder->CreateMul(X, CI);
return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
}
}
@@ -244,7 +244,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (BoolCast) {
Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
- BoolCast, "tmp");
+ BoolCast);
return BinaryOperator::CreateAnd(V, OtherOp);
}
}
@@ -466,8 +466,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
{ const APInt *CI; Value *N;
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
if (*CI != 1)
- N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
- "tmp");
+ N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2()));
if (I.isExact())
return BinaryOperator::CreateExactLShr(Op0, N);
return BinaryOperator::CreateLShr(Op0, N);
@@ -630,7 +629,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
// Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
- Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+ Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bf1049d152..664546c165 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -229,8 +229,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
Value *Base = FixedOperands[0];
GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
- FixedOperands.end());
+ GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
return NewGEP;
@@ -287,7 +286,12 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
-
+
+ // FIXME: This is overconservative; this transform is allowed in some cases
+ // for atomic operations.
+ if (FirstLI->isAtomic())
+ return 0;
+
// When processing loads, we need to propagate two bits of information to the
// sunk load: whether it is volatile, and what its alignment is. We currently
// don't sink loads when some have their alignment specified and some don't.
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index bd7f40d8ac..91e60a4fb2 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
using namespace llvm;
using namespace PatternMatch;
@@ -323,9 +324,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
}
// All operands were constants, fold it.
- if (ConstOps.size() == I->getNumOperands())
+ if (ConstOps.size() == I->getNumOperands()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
ConstOps, TD);
+ }
}
return 0;
@@ -476,10 +482,16 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ return ReplaceInstUsesWith(SI, TrueVal);
}
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 65d1a66f71..6d85adde9b 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
@@ -207,11 +208,12 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
return I;
case Instruction::Shl: {
- unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
// We only accept shifts-by-a-constant in CanEvaluateShifted.
- ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
-
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+
// We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
if (isLeftShift) {
// If this is oversized composite shift, then unsigned shifts get 0.
@@ -219,7 +221,9 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
if (NewShAmt >= TypeWidth)
return Constant::getNullValue(I->getType());
- I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
return I;
}
@@ -227,11 +231,11 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// zeros.
if (CI->getValue() == NumBits) {
APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
- V = IC.Builder->CreateAnd(I->getOperand(0),
- ConstantInt::get(I->getContext(), Mask));
+ V = IC.Builder->CreateAnd(BO->getOperand(0),
+ ConstantInt::get(BO->getContext(), Mask));
if (Instruction *VI = dyn_cast<Instruction>(V)) {
- VI->moveBefore(I);
- VI->takeName(I);
+ VI->moveBefore(BO);
+ VI->takeName(BO);
}
return V;
}
@@ -239,23 +243,27 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
// the and won't be needed.
assert(CI->getZExtValue() > NumBits);
- I->setOperand(1, ConstantInt::get(I->getType(),
- CI->getZExtValue() - NumBits));
- return I;
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setHasNoUnsignedWrap(false);
+ BO->setHasNoSignedWrap(false);
+ return BO;
}
case Instruction::LShr: {
- unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ BinaryOperator *BO = cast<BinaryOperator>(I);
+ unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
// We only accept shifts-by-a-constant in CanEvaluateShifted.
- ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
// We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
if (!isLeftShift) {
// If this is oversized composite shift, then unsigned shifts get 0.
unsigned NewShAmt = NumBits+CI->getZExtValue();
if (NewShAmt >= TypeWidth)
- return Constant::getNullValue(I->getType());
+ return Constant::getNullValue(BO->getType());
- I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
+ BO->setIsExact(false);
return I;
}
@@ -264,7 +272,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
if (CI->getValue() == NumBits) {
APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
V = IC.Builder->CreateAnd(I->getOperand(0),
- ConstantInt::get(I->getContext(), Mask));
+ ConstantInt::get(BO->getContext(), Mask));
if (Instruction *VI = dyn_cast<Instruction>(V)) {
VI->moveBefore(I);
VI->takeName(I);
@@ -275,9 +283,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
// the and won't be needed.
assert(CI->getZExtValue() > NumBits);
- I->setOperand(1, ConstantInt::get(I->getType(),
- CI->getZExtValue() - NumBits));
- return I;
+ BO->setOperand(1, ConstantInt::get(BO->getType(),
+ CI->getZExtValue() - NumBits));
+ BO->setIsExact(false);
+ return BO;
}
case Instruction::Select:
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 66f39be17b..5cd9a4b795 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -325,8 +325,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
Constant *AndC = Constant::getIntegerValue(VTy,
~RHSKnownOne & DemandedMask);
- Instruction *And =
- BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
return InsertNewInstWith(And, *I);
}
}
@@ -351,14 +350,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Constant *AndC =
ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
- Instruction *NewAnd =
- BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+ Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
InsertNewInstWith(NewAnd, *I);
Constant *XorC =
ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
- Instruction *NewXor =
- BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+ Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
return InsertNewInstWith(NewXor, *I);
}
@@ -962,6 +959,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
UndefElts.setBit(i);
+ } else if (!DemandedElts[i]) {
+ NewUndefElts = true;
+ UndefElts.setBit(i);
} else if (MaskVal < LHSVWidth) {
if (UndefElts4[MaskVal]) {
NewUndefElts = true;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 021ca13257..288fe68097 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -46,8 +46,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm-c/Initialization.h"
#include <algorithm>
#include <climits>
@@ -107,6 +109,43 @@ bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
return true;
}
+// Return true, if No Signed Wrap should be maintained for I.
+// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
+// where both B and C should be ConstantInts, results in a constant that does
+// not overflow. This function only handles the Add and Sub opcodes. For
+// all other opcodes, the function conservatively returns false.
+static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
+ OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ if (!OBO || !OBO->hasNoSignedWrap()) {
+ return false;
+ }
+
+ // We reason about Add and Sub Only.
+ Instruction::BinaryOps Opcode = I.getOpcode();
+ if (Opcode != Instruction::Add &&
+ Opcode != Instruction::Sub) {
+ return false;
+ }
+
+ ConstantInt *CB = dyn_cast<ConstantInt>(B);
+ ConstantInt *CC = dyn_cast<ConstantInt>(C);
+
+ if (!CB || !CC) {
+ return false;
+ }
+
+ const APInt &BVal = CB->getValue();
+ const APInt &CVal = CC->getValue();
+ bool Overflow = false;
+
+ if (Opcode == Instruction::Add) {
+ BVal.sadd_ov(CVal, Overflow);
+ } else {
+ BVal.ssub_ov(CVal, Overflow);
+ }
+
+ return !Overflow;
+}
/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
/// operators which are associative or commutative:
@@ -158,7 +197,16 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, V);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- I.clearSubclassOptionalData();
+ if (MaintainNoSignedWrap(I, B, C) &&
+ (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
+ // Note: this is only valid because SimplifyBinOp doesn't look at
+ // the operands to Op0.
+ I.clearSubclassOptionalData();
+ I.setHasNoSignedWrap(true);
+ } else {
+ I.clearSubclassOptionalData();
+ }
+
Changed = true;
++NumReassoc;
continue;
@@ -240,7 +288,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Constant *C2 = cast<Constant>(Op1->getOperand(1));
Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
- Instruction *New = BinaryOperator::Create(Opcode, A, B);
+ BinaryOperator *New = BinaryOperator::Create(Opcode, A, B);
InsertNewInstWith(New, I);
New->takeName(Op1);
I.setOperand(0, New);
@@ -248,6 +296,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
I.clearSubclassOptionalData();
+
Changed = true;
continue;
}
@@ -737,7 +786,15 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
return Ty;
}
-
+static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
+ // If this GEP has only 0 indices, it is the same pointer as
+ // Src. If Src is not a trivial GEP too, don't combine
+ // the indices.
+ if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
+ !Src.hasOneUse())
+ return false;
+ return true;
+}
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
@@ -785,21 +842,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr instructions into a single instruction.
//
if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
-
- // If this GEP has only 0 indices, it is the same pointer as
- // Src. If Src is not a trivial GEP too, don't combine
- // the indices.
- if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
- !Src->hasOneUse())
+ if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return 0;
// Note that if our source is a gep chain itself that we wait for that
// chain to be resolved before we perform this transformation. This
// avoids us creating a TON of code in some cases.
- //
- if (GetElementPtrInst *SrcGEP =
- dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
- if (SrcGEP->getNumOperands() == 2)
+ if (GEPOperator *SrcGEP =
+ dyn_cast<GEPOperator>(Src->getOperand(0)))
+ if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
return 0; // Wait until our source is folded to completion.
SmallVector<Value*, 8> Indices;
@@ -851,10 +902,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Indices.empty())
return (GEP.isInBounds() && Src->isInBounds()) ?
- GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
- Indices.end(), GEP.getName()) :
- GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
- Indices.end(), GEP.getName());
+ GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
+ GEP.getName()) :
+ GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
@@ -883,8 +933,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
GetElementPtrInst *Res =
- GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
- Idx.end(), GEP.getName());
+ GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
return Res;
}
@@ -916,8 +965,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
Idx[1] = GEP.getOperand(1);
Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return new BitCastInst(NewGEP, GEP.getType());
}
@@ -975,8 +1024,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
Idx[1] = NewIdx;
Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
- Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()):
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return new BitCastInst(NewGEP, GEP.getType());
}
@@ -1027,10 +1076,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
if (FindElementAtOffset(InTy, Offset, NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
- NewIndices.end()) :
- Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
- NewIndices.end());
+ Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
+ Builder->CreateGEP(BCI->getOperand(0), NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1045,15 +1092,43 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-static bool IsOnlyNullComparedAndFreed(const Value &V) {
- for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users,
+ int Depth = 0) {
+ if (Depth == 8)
+ return false;
+
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
- const User *U = *UI;
- if (isFreeCall(U))
+ User *U = *UI;
+ if (isFreeCall(U)) {
+ Users.push_back(U);
continue;
- if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
- if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+ }
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+ if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) {
+ Users.push_back(ICI);
+ continue;
+ }
+ }
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) {
+ Users.push_back(BCI);
+ continue;
+ }
+ }
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) {
+ Users.push_back(GEPI);
+ continue;
+ }
+ }
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ Users.push_back(II);
continue;
+ }
+ }
return false;
}
return true;
@@ -1063,25 +1138,20 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) {
// If we have a malloc call which is only used in any amount of comparisons
// to null and free calls, delete the calls and replace the comparisons with
// true or false as appropriate.
- if (IsOnlyNullComparedAndFreed(MI)) {
- for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
- UI != UE;) {
- // We can assume that every remaining use is a free call or an icmp eq/ne
- // to null, so the cast is safe.
- Instruction *I = cast<Instruction>(*UI);
-
- // Early increment here, as we're about to get rid of the user.
- ++UI;
-
- if (isFreeCall(I)) {
- EraseInstFromFunction(*cast<CallInst>(I));
- continue;
+ SmallVector<WeakVH, 64> Users;
+ if (IsOnlyNullComparedAndFreed(&MI, Users)) {
+ for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+ Instruction *I = cast_or_null<Instruction>(&*Users[i]);
+ if (!I) continue;
+
+ if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
+ ReplaceInstUsesWith(*C,
+ ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) {
+ ReplaceInstUsesWith(*I, UndefValue::get(I->getType()));
}
- // Again, the cast is safe.
- ICmpInst *C = cast<ICmpInst>(I);
- ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
- C->isFalseWhenEqual()));
- EraseInstFromFunction(*C);
+ EraseInstFromFunction(*I);
}
return EraseInstFromFunction(MI);
}
@@ -1120,8 +1190,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
!isa<Constant>(X)) {
// Swap Destinations and condition...
BI.setCondition(X);
- BI.setSuccessor(0, FalseDest);
- BI.setSuccessor(1, TrueDest);
+ BI.swapSuccessors();
return &BI;
}
@@ -1136,8 +1205,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
// Swap Destinations and condition.
- BI.setSuccessor(0, FalseDest);
- BI.setSuccessor(1, TrueDest);
+ BI.swapSuccessors();
Worklist.Add(Cond);
return &BI;
}
@@ -1153,8 +1221,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
// Swap Destinations and condition.
- BI.setSuccessor(0, FalseDest);
- BI.setSuccessor(1, TrueDest);
+ BI.swapSuccessors();
Worklist.Add(Cond);
return &BI;
}
@@ -1168,11 +1235,17 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
if (I->getOpcode() == Instruction::Add)
if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
// change 'switch (X+4) case 1:' into 'switch (X) case -3'
- for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
- SI.setOperand(i,
- ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
- AddRHS));
- SI.setOperand(0, I->getOperand(0));
+ unsigned NumCases = SI.getNumCases();
+ // Skip the first item since that's the default case.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ ConstantInt* CaseVal = SI.getCaseValue(i);
+ Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
+ AddRHS);
+ assert(isa<ConstantInt>(NewCaseVal) &&
+ "Result of expression should be constant");
+ SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal));
+ }
+ SI.setCondition(I->getOperand(0));
Worklist.Add(I);
return &SI;
}
@@ -1310,7 +1383,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// load from a GEP. This reduces the size of the load.
// FIXME: If a load is used only by extractvalue instructions then this
// could be done regardless of having multiple uses.
- if (!L->isVolatile() && L->hasOneUse()) {
+ if (L->isSimple() && L->hasOneUse()) {
// extractvalue has integer indices, getelementptr has Value*s. Convert.
SmallVector<Value*, 4> Indices;
// Prefix an i32 0 since we need the first element.
@@ -1322,8 +1395,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
Builder->SetInsertPoint(L->getParent(), L);
- Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
- Indices.begin(), Indices.end());
+ Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use ReplaceInstUsesWith().
return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -1339,6 +1411,345 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return 0;
}
+enum Personality_Type {
+ Unknown_Personality,
+ GNU_Ada_Personality,
+ GNU_CXX_Personality,
+ GNU_ObjC_Personality
+};
+
+/// RecognizePersonality - See if the given exception handling personality
+/// function is one that we understand. If so, return a description of it;
+/// otherwise return Unknown_Personality.
+static Personality_Type RecognizePersonality(Value *Pers) {
+ Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
+ if (!F)
+ return Unknown_Personality;
+ return StringSwitch<Personality_Type>(F->getName())
+ .Case("__gnat_eh_personality", GNU_Ada_Personality)
+ .Case("__gxx_personality_v0", GNU_CXX_Personality)
+ .Case("__objc_personality_v0", GNU_ObjC_Personality)
+ .Default(Unknown_Personality);
+}
+
+/// isCatchAll - Return 'true' if the given typeinfo will match anything.
+static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
+ switch (Personality) {
+ case Unknown_Personality:
+ return false;
+ case GNU_Ada_Personality:
+ // While __gnat_all_others_value will match any Ada exception, it doesn't
+ // match foreign exceptions (or didn't, before gcc-4.7).
+ return false;
+ case GNU_CXX_Personality:
+ case GNU_ObjC_Personality:
+ return TypeInfo->isNullValue();
+ }
+ llvm_unreachable("Unknown personality!");
+}
+
+static bool shorter_filter(const Value *LHS, const Value *RHS) {
+ return
+ cast<ArrayType>(LHS->getType())->getNumElements()
+ <
+ cast<ArrayType>(RHS->getType())->getNumElements();
+}
+
+Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
+ // The logic here should be correct for any real-world personality function.
+ // However if that turns out not to be true, the offending logic can always
+ // be conditioned on the personality function, like the catch-all logic is.
+ Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn());
+
+ // Simplify the list of clauses, eg by removing repeated catch clauses
+ // (these are often created by inlining).
+ bool MakeNewInstruction = false; // If true, recreate using the following:
+ SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction;
+ bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
+
+ SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
+ for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
+ bool isLastClause = i + 1 == e;
+ if (LI.isCatch(i)) {
+ // A catch clause.
+ Value *CatchClause = LI.getClause(i);
+ Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts());
+
+ // If we already saw this clause, there is no point in having a second
+ // copy of it.
+ if (AlreadyCaught.insert(TypeInfo)) {
+ // This catch clause was not already seen.
+ NewClauses.push_back(CatchClause);
+ } else {
+ // Repeated catch clause - drop the redundant copy.
+ MakeNewInstruction = true;
+ }
+
+ // If this is a catch-all then there is no point in keeping any following
+ // clauses or marking the landingpad as having a cleanup.
+ if (isCatchAll(Personality, TypeInfo)) {
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+ } else {
+ // A filter clause. If any of the filter elements were already caught
+ // then they can be dropped from the filter. It is tempting to try to
+ // exploit the filter further by saying that any typeinfo that does not
+ // occur in the filter can't be caught later (and thus can be dropped).
+ // However this would be wrong, since typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some
+ // class derived from it).
+ assert(LI.isFilter(i) && "Unsupported landingpad clause!");
+ Value *FilterClause = LI.getClause(i);
+ ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
+ unsigned NumTypeInfos = FilterType->getNumElements();
+
+ // An empty filter catches everything, so there is no point in keeping any
+ // following clauses or marking the landingpad as having a cleanup. By
+ // dealing with this case here the following code is made a bit simpler.
+ if (!NumTypeInfos) {
+ NewClauses.push_back(FilterClause);
+ if (!isLastClause)
+ MakeNewInstruction = true;
+ CleanupFlag = false;
+ break;
+ }
+
+ bool MakeNewFilter = false; // If true, make a new filter.
+ SmallVector<Constant *, 16> NewFilterElts; // New elements.
+ if (isa<ConstantAggregateZero>(FilterClause)) {
+ // Not an empty filter - it contains at least one null typeinfo.
+ assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
+ Constant *TypeInfo =
+ Constant::getNullValue(FilterType->getElementType());
+ // If this typeinfo is a catch-all then the filter can never match.
+ if (isCatchAll(Personality, TypeInfo)) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // There is no point in having multiple copies of this typeinfo, so
+ // discard all but the first copy if there is more than one.
+ NewFilterElts.push_back(TypeInfo);
+ if (NumTypeInfos > 1)
+ MakeNewFilter = true;
+ } else {
+ ConstantArray *Filter = cast<ConstantArray>(FilterClause);
+ SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
+ NewFilterElts.reserve(NumTypeInfos);
+
+ // Remove any filter elements that were already caught or that already
+ // occurred in the filter. While there, see if any of the elements are
+ // catch-alls. If so, the filter can be discarded.
+ bool SawCatchAll = false;
+ for (unsigned j = 0; j != NumTypeInfos; ++j) {
+ Value *Elt = Filter->getOperand(j);
+ Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts());
+ if (isCatchAll(Personality, TypeInfo)) {
+ // This element is a catch-all. Bail out, noting this fact.
+ SawCatchAll = true;
+ break;
+ }
+ if (AlreadyCaught.count(TypeInfo))
+ // Already caught by an earlier clause, so having it in the filter
+ // is pointless.
+ continue;
+ // There is no point in having multiple copies of the same typeinfo in
+ // a filter, so only add it if we didn't already.
+ if (SeenInFilter.insert(TypeInfo))
+ NewFilterElts.push_back(cast<Constant>(Elt));
+ }
+ // A filter containing a catch-all cannot match anything by definition.
+ if (SawCatchAll) {
+ // Throw the filter away.
+ MakeNewInstruction = true;
+ continue;
+ }
+
+ // If we dropped something from the filter, make a new one.
+ if (NewFilterElts.size() < NumTypeInfos)
+ MakeNewFilter = true;
+ }
+ if (MakeNewFilter) {
+ FilterType = ArrayType::get(FilterType->getElementType(),
+ NewFilterElts.size());
+ FilterClause = ConstantArray::get(FilterType, NewFilterElts);
+ MakeNewInstruction = true;
+ }
+
+ NewClauses.push_back(FilterClause);
+
+ // If the new filter is empty then it will catch everything so there is
+ // no point in keeping any following clauses or marking the landingpad
+ // as having a cleanup. The case of the original filter being empty was
+ // already handled above.
+ if (MakeNewFilter && !NewFilterElts.size()) {
+ assert(MakeNewInstruction && "New filter but not a new instruction!");
+ CleanupFlag = false;
+ break;
+ }
+ }
+ }
+
+ // If several filters occur in a row then reorder them so that the shortest
+ // filters come first (those with the smallest number of elements). This is
+ // advantageous because shorter filters are more likely to match, speeding up
+ // unwinding, but mostly because it increases the effectiveness of the other
+ // filter optimizations below.
+ for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
+ unsigned j;
+ // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
+ for (j = i; j != e; ++j)
+ if (!isa<ArrayType>(NewClauses[j]->getType()))
+ break;
+
+ // Check whether the filters are already sorted by length. We need to know
+ // if sorting them is actually going to do anything so that we only make a
+ // new landingpad instruction if it does.
+ for (unsigned k = i; k + 1 < j; ++k)
+ if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
+ // Not sorted, so sort the filters now. Doing an unstable sort would be
+ // correct too but reordering filters pointlessly might confuse users.
+ std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
+ shorter_filter);
+ MakeNewInstruction = true;
+ break;
+ }
+
+ // Look for the next batch of filters.
+ i = j + 1;
+ }
+
+ // If typeinfos matched if and only if equal, then the elements of a filter L
+ // that occurs later than a filter F could be replaced by the intersection of
+ // the elements of F and L. In reality two typeinfos can match without being
+ // equal (for example if one represents a C++ class, and the other some class
+ // derived from it) so it would be wrong to perform this transform in general.
+ // However the transform is correct and useful if F is a subset of L. In that
+ // case L can be replaced by F, and thus removed altogether since repeating a
+ // filter is pointless. So here we look at all pairs of filters F and L where
+ // L follows F in the list of clauses, and remove L if every element of F is
+ // an element of L. This can occur when inlining C++ functions with exception
+ // specifications.
+ for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
+ // Examine each filter in turn.
+ Value *Filter = NewClauses[i];
+ ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
+ if (!FTy)
+ // Not a filter - skip it.
+ continue;
+ unsigned FElts = FTy->getNumElements();
+ // Examine each filter following this one. Doing this backwards means that
+ // we don't have to worry about filters disappearing under us when removed.
+ for (unsigned j = NewClauses.size() - 1; j != i; --j) {
+ Value *LFilter = NewClauses[j];
+ ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
+ if (!LTy)
+ // Not a filter - skip it.
+ continue;
+ // If Filter is a subset of LFilter, i.e. every element of Filter is also
+ // an element of LFilter, then discard LFilter.
+ SmallVector<Value *, 16>::iterator J = NewClauses.begin() + j;
+ // If Filter is empty then it is a subset of LFilter.
+ if (!FElts) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ // Move on to the next filter.
+ continue;
+ }
+ unsigned LElts = LTy->getNumElements();
+ // If Filter is longer than LFilter then it cannot be a subset of it.
+ if (FElts > LElts)
+ // Move on to the next filter.
+ continue;
+ // At this point we know that LFilter has at least one element.
+ if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
+ // Filter is a subset of LFilter iff Filter contains only zeros (as we
+ // already know that Filter is not longer than LFilter).
+ if (isa<ConstantAggregateZero>(Filter)) {
+ assert(FElts <= LElts && "Should have handled this case earlier!");
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ ConstantArray *LArray = cast<ConstantArray>(LFilter);
+ if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
+ // Since Filter is non-empty and contains only zeros, it is a subset of
+ // LFilter iff LFilter contains a zero.
+ assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
+ for (unsigned l = 0; l != LElts; ++l)
+ if (LArray->getOperand(l)->isNullValue()) {
+ // LFilter contains a zero - discard it.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ break;
+ }
+ // Move on to the next filter.
+ continue;
+ }
+ // At this point we know that both filters are ConstantArrays. Loop over
+ // operands to see whether every element of Filter is also an element of
+ // LFilter. Since filters tend to be short this is probably faster than
+ // using a method that scales nicely.
+ ConstantArray *FArray = cast<ConstantArray>(Filter);
+ bool AllFound = true;
+ for (unsigned f = 0; f != FElts; ++f) {
+ Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
+ AllFound = false;
+ for (unsigned l = 0; l != LElts; ++l) {
+ Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
+ if (LTypeInfo == FTypeInfo) {
+ AllFound = true;
+ break;
+ }
+ }
+ if (!AllFound)
+ break;
+ }
+ if (AllFound) {
+ // Discard LFilter.
+ NewClauses.erase(J);
+ MakeNewInstruction = true;
+ }
+ // Move on to the next filter.
+ }
+ }
+
+ // If we changed any of the clauses, replace the old landingpad instruction
+ // with a new one.
+ if (MakeNewInstruction) {
+ LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
+ LI.getPersonalityFn(),
+ NewClauses.size());
+ for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
+ NLI->addClause(NewClauses[i]);
+ // A landing pad with no clauses must have the cleanup flag set. It is
+ // theoretically possible, though highly unlikely, that we eliminated all
+ // clauses. If so, force the cleanup flag to true.
+ if (NewClauses.empty())
+ CleanupFlag = true;
+ NLI->setCleanup(CleanupFlag);
+ return NLI;
+ }
+
+ // Even if none of the clauses changed, we may nonetheless have understood
+ // that the cleanup flag is pointless. Clear it if so.
+ if (LI.isCleanup() != CleanupFlag) {
+ assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
+ LI.setCleanup(CleanupFlag);
+ return &LI;
+ }
+
+ return 0;
+}
+
@@ -1350,7 +1761,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
assert(I->hasOneUse() && "Invariants didn't hold!");
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
- if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+ if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() ||
+ isa<TerminatorInst>(I))
return false;
// Do not sink alloca instructions out of the entry block.
@@ -1367,8 +1779,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
return false;
}
- BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
I->moveBefore(InsertPos);
++NumSunkInst;
return true;
@@ -1503,27 +1914,29 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
// the instcombine code from having to deal with some bad special cases.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (!Visited.count(BB)) {
- Instruction *Term = BB->getTerminator();
- while (Term != BB->begin()) { // Remove instrs bottom-up
- BasicBlock::iterator I = Term; --I;
-
- DEBUG(errs() << "IC: DCE: " << *I << '\n');
- // A debug intrinsic shouldn't force another iteration if we weren't
- // going to do one without it.
- if (!isa<DbgInfoIntrinsic>(I)) {
- ++NumDeadInst;
- MadeIRChange = true;
- }
-
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I->getType()->isVoidTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- I->eraseFromParent();
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Visited.count(BB)) continue;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
}
+ if (!isa<DbgInfoIntrinsic>(Inst)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+ Inst->eraseFromParent();
}
+ }
}
while (!Worklist.isEmpty()) {
@@ -1604,13 +2017,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
+ // Move the name to the new instruction first.
+ Result->takeName(I);
+
// Push the new instruction and any users onto the worklist.
Worklist.Add(Result);
Worklist.AddUsersToWorkList(*Result);
- // Move the name to the new instruction first.
- Result->takeName(I);
-
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
BasicBlock::iterator InsertPos = I;
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 5700ac87f6..7b3a927a4e 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -6,3 +6,10 @@ add_llvm_library(LLVMInstrumentation
PathProfiling.cpp
ProfilingUtils.cpp
)
+
+add_llvm_library_dependencies(LLVMInstrumentation
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTransformUtils
+ )
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index bd1b46307e..ccf7e1109c 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -60,11 +60,11 @@ namespace {
bool runOnModule(Module &M);
// Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO(DebugInfoFinder &DIF);
+ void emitGCNO();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
- bool emitProfileArcs(DebugInfoFinder &DIF);
+ bool emitProfileArcs();
// Get pointers to the functions in the runtime library.
Constant *getStartFileFunc();
@@ -86,8 +86,7 @@ namespace {
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(DebugInfoFinder &,
- SmallVector<std::pair<GlobalVariable *,
+ void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *,
MDNode *>, 8> &);
std::string mangleName(DICompileUnit CU, std::string NewStem);
@@ -110,15 +109,6 @@ ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
}
-static DISubprogram findSubprogram(DIScope Scope) {
- while (!Scope.isSubprogram()) {
- assert(Scope.isLexicalBlock() &&
- "Debug location not lexical block or subprogram");
- Scope = DILexicalBlock(Scope).getContext();
- }
- return DISubprogram(Scope);
-}
-
namespace {
class GCOVRecord {
protected:
@@ -177,18 +167,24 @@ namespace {
}
uint32_t length() {
+ // Here 2 = 1 for string lenght + 1 for '0' id#.
return lengthOfGCOVString(Filename) + 2 + Lines.size();
}
- private:
- friend class GCOVBlock;
+ void writeOut() {
+ write(0);
+ writeGCOVString(Filename);
+ for (int i = 0, e = Lines.size(); i != e; ++i)
+ write(Lines[i]);
+ }
- GCOVLines(std::string Filename, raw_ostream *os)
- : Filename(Filename) {
+ GCOVLines(StringRef F, raw_ostream *os)
+ : Filename(F) {
this->os = os;
}
- std::string Filename;
+ private:
+ StringRef Filename;
SmallVector<uint32_t, 32> Lines;
};
@@ -197,7 +193,7 @@ namespace {
// other blocks.
class GCOVBlock : public GCOVRecord {
public:
- GCOVLines &getFile(std::string Filename) {
+ GCOVLines &getFile(StringRef Filename) {
GCOVLines *&Lines = LinesByFile[Filename];
if (!Lines) {
Lines = new GCOVLines(Filename, os);
@@ -220,13 +216,8 @@ namespace {
write(Len);
write(Number);
for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
- E = LinesByFile.end(); I != E; ++I) {
- write(0);
- writeGCOVString(I->second->Filename);
- for (int i = 0, e = I->second->Lines.size(); i != e; ++i) {
- write(I->second->Lines[i]);
- }
- }
+ E = LinesByFile.end(); I != E; ++I)
+ I->second->writeOut();
write(0);
write(0);
}
@@ -353,66 +344,66 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- DebugInfoFinder DIF;
- DIF.processModule(M);
-
- if (EmitNotes) emitGCNO(DIF);
- if (EmitData) return emitProfileArcs(DIF);
+ if (EmitNotes) emitGCNO();
+ if (EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
+void GCOVProfiler::emitGCNO() {
DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
- for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
- E = DIF.compile_unit_end(); I != E; ++I) {
- // Each compile unit gets its own .gcno file. This means that whether we run
- // this pass over the original .o's as they're produced, or run it after
- // LTO, we'll generate the same .gcno files.
-
- DICompileUnit CU(*I);
- raw_fd_ostream *&out = GcnoFiles[CU];
- std::string ErrorInfo;
- out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out->write("oncg*404MVLL", 12);
- else
- out->write("oncg*402MVLL", 12);
- }
-
- for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
- SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
- DISubprogram SP(*SPI);
- raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()];
-
- Function *F = SP.getFunction();
- if (!F) continue;
- GCOVFunction Func(SP, os, Use402Format);
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
- TerminatorInst *TI = BB->getTerminator();
- if (int successors = TI->getNumSuccessors()) {
- for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ raw_fd_ostream *&out = GcnoFiles[CU];
+ std::string ErrorInfo;
+ out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!Use402Format)
+ out->write("oncg*404MVLL", 12);
+ else
+ out->write("oncg*204MVLL", 12);
+
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+ raw_fd_ostream *&os = GcnoFiles[CU];
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, os, Use402Format);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ }
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
+ }
}
- } else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
- }
-
- uint32_t Line = 0;
- for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
- const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
- if (Line == Loc.getLine()) continue;
- Line = Loc.getLine();
- if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue;
-
- GCOVLines &Lines = Block.getFile(SP.getFilename());
- Lines.addLine(Loc.getLine());
+ Func.writeOut();
}
}
- Func.writeOut();
}
for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
@@ -424,103 +415,107 @@ void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) {
}
}
-bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) {
- if (DIF.subprogram_begin() == DIF.subprogram_end())
- return false;
-
- SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
- for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
- SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
- DISubprogram SP(*SPI);
- Function *F = SP.getFunction();
- if (!F) continue;
-
- unsigned Edges = 0;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- if (isa<ReturnInst>(TI))
- ++Edges;
- else
- Edges += TI->getNumSuccessors();
- }
-
- ArrayType *CounterTy =
+bool GCOVProfiler::emitProfileArcs() {
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes) return false;
+
+ bool Result = false;
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ DIArray SPs = CU.getSubprograms();
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ if (!Result) Result = true;
+ unsigned Edges = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ if (isa<ReturnInst>(TI))
+ ++Edges;
+ else
+ Edges += TI->getNumSuccessors();
+ }
+
+ ArrayType *CounterTy =
ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
- GlobalVariable *Counters =
+ GlobalVariable *Counters =
new GlobalVariable(*M, CounterTy, false,
GlobalValue::InternalLinkage,
Constant::getNullValue(CounterTy),
"__llvm_gcov_ctr", 0, false, 0);
- CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
-
- UniqueVector<BasicBlock *> ComplexEdgePreds;
- UniqueVector<BasicBlock *> ComplexEdgeSuccs;
-
- unsigned Edge = 0;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
- if (Successors) {
- IRBuilder<> Builder(TI);
-
- if (Successors == 1) {
- Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
- Edge);
- Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
- Builder.CreateStore(Count, Counter);
- } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- Value *Sel = Builder.CreateSelect(
+ CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+
+ UniqueVector<BasicBlock *> ComplexEdgePreds;
+ UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+
+ unsigned Edge = 0;
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+ if (Successors) {
+ IRBuilder<> Builder(TI);
+
+ if (Successors == 1) {
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ Edge);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ Value *Sel = Builder.CreateSelect(
BI->getCondition(),
ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
- SmallVector<Value *, 2> Idx;
- Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
- Idx.push_back(Sel);
- Value *Counter = Builder.CreateInBoundsGEP(Counters,
- Idx.begin(), Idx.end());
- Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
- Builder.CreateStore(Count, Counter);
- } else {
- ComplexEdgePreds.insert(BB);
- for (int i = 0; i != Successors; ++i)
- ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ SmallVector<Value *, 2> Idx;
+ Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+ Idx.push_back(Sel);
+ Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+ Value *Count = Builder.CreateLoad(Counter);
+ Count = Builder.CreateAdd(Count,
+ ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Builder.CreateStore(Count, Counter);
+ } else {
+ ComplexEdgePreds.insert(BB);
+ for (int i = 0; i != Successors; ++i)
+ ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+ }
+ Edge += Successors;
}
- Edge += Successors;
}
- }
-
- if (!ComplexEdgePreds.empty()) {
- GlobalVariable *EdgeTable =
+
+ if (!ComplexEdgePreds.empty()) {
+ GlobalVariable *EdgeTable =
buildEdgeLookupTable(F, Counters,
ComplexEdgePreds, ComplexEdgeSuccs);
- GlobalVariable *EdgeState = getEdgeStateValue();
-
- Type *Int32Ty = Type::getInt32Ty(*Ctx);
- for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
- Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
- }
- for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
- // call runtime to perform increment
- IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI());
- Value *CounterPtrArray =
+ GlobalVariable *EdgeState = getEdgeStateValue();
+
+ Type *Int32Ty = Type::getInt32Ty(*Ctx);
+ for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+ IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+ }
+ for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+ // call runtime to perform increment
+ BasicBlock::iterator InsertPt =
+ ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
+ IRBuilder<> Builder(InsertPt);
+ Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
- Builder.CreateCall2(getIncrementIndirectCounterFunc(),
- EdgeState, CounterPtrArray);
- // clear the predecessor number
- Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+ Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+ EdgeState, CounterPtrArray);
+ // clear the predecessor number
+ Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState);
+ }
}
}
+ insertCounterWriteout(CountersBySP);
}
-
- insertCounterWriteout(DIF, CountersBySP);
-
- return true;
+ return Result;
}
// All edges with successors that aren't branches are "complex", because it
@@ -626,7 +621,6 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
}
void GCOVProfiler::insertCounterWriteout(
- DebugInfoFinder &DIF,
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
FunctionType *WriteoutFTy =
FunctionType::get(Type::getVoidTy(*Ctx), false);
@@ -642,29 +636,31 @@ void GCOVProfiler::insertCounterWriteout(
Constant *EmitArcs = getEmitArcsFunc();
Constant *EndFile = getEndFileFunc();
- for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
- CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
- DICompileUnit compile_unit(*CUI);
- std::string FilenameGcda = mangleName(compile_unit, "gcda");
- Builder.CreateCall(StartFile,
- Builder.CreateGlobalStringPtr(FilenameGcda));
- for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (CU_Nodes) {
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ DICompileUnit compile_unit(CU_Nodes->getOperand(i));
+ std::string FilenameGcda = mangleName(compile_unit, "gcda");
+ Builder.CreateCall(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda));
+ for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
I = CountersBySP.begin(), E = CountersBySP.end();
- I != E; ++I) {
- DISubprogram SP(I->second);
- intptr_t ident = reinterpret_cast<intptr_t>(I->second);
- Builder.CreateCall2(EmitFunction,
- ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
- Builder.CreateGlobalStringPtr(SP.getName()));
-
- GlobalVariable *GV = I->first;
- unsigned Arcs =
+ I != E; ++I) {
+ DISubprogram SP(I->second);
+ intptr_t ident = reinterpret_cast<intptr_t>(I->second);
+ Builder.CreateCall2(EmitFunction,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
+ Builder.CreateGlobalStringPtr(SP.getName()));
+
+ GlobalVariable *GV = I->first;
+ unsigned Arcs =
cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
- Builder.CreateCall2(EmitArcs,
- ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
- Builder.CreateConstGEP2_64(GV, 0, 0));
+ Builder.CreateCall2(EmitArcs,
+ ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+ Builder.CreateConstGEP2_64(GV, 0, 0));
+ }
+ Builder.CreateCall(EndFile);
}
- Builder.CreateCall(EndFile);
}
Builder.CreateRetVoid();
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index c6147fa18f..23915d39f2 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -909,7 +909,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
pathNumber) {
if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
|| (((Instruction*)(pathNumber))->getParent()) != block) {
- return(block->getFirstNonPHI());
+ return(block->getFirstInsertionPt());
} else {
Instruction* pathNumberInst = (Instruction*) (pathNumber);
BasicBlock::iterator insertPoint;
@@ -930,7 +930,7 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
// A PHINode is created in the node, and its values initialized to -1U.
void PathProfiler::preparePHI(BLInstrumentationNode* node) {
BasicBlock* block = node->getBlock();
- BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+ BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
pred_iterator PB = pred_begin(node->getBlock()),
PE = pred_end(node->getBlock());
PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
@@ -999,7 +999,7 @@ void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
BasicBlock::iterator insertPoint;
if( atBeginning )
- insertPoint = block->getFirstNonPHI();
+ insertPoint = block->getFirstInsertionPt();
else
insertPoint = block->getTerminator();
@@ -1029,8 +1029,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue,
gepIndices[1] = incValue;
GetElementPtrInst* pcPointer =
- GetElementPtrInst::Create(dag->getCounterArray(),
- gepIndices.begin(), gepIndices.end(),
+ GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
"counterInc", insertPoint);
// Load from the array - call it oldPC
@@ -1140,7 +1139,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
}
BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getFirstInsertionPt() :
instrumentNode->getBlock()->getTerminator();
// add information from the bottom edge, if it exists
@@ -1172,7 +1171,7 @@ void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
// Insert instrumentation if this is a normal edge
else {
BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getFirstInsertionPt() :
instrumentNode->getBlock()->getTerminator();
if( edge->isInitialization() ) { // initialize path number
@@ -1233,7 +1232,7 @@ void PathProfiler::insertInstrumentation(
end = callEdges.end(); edge != end; edge++ ) {
BLInstrumentationNode* node =
(BLInstrumentationNode*)(*edge)->getSource();
- BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+ BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
// Find the first function call
while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 0ebab33f5c..de57cd1734 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -51,8 +51,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
Constant::getNullValue(Type::getInt32Ty(Context)));
unsigned NumElements = 0;
if (Array) {
- Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
- GEPIndices.size());
+ Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
NumElements =
cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
} else {
@@ -108,7 +107,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
GlobalValue *CounterArray, bool beginning) {
// Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+ BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
BB->getTerminator();
while (isa<AllocaInst>(InsertPos))
++InsertPos;
@@ -120,7 +119,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
Constant *ElementPtr =
- ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size());
+ ConstantExpr::getGetElementPtr(CounterArray, Indices);
// Load, increment and store the value back.
Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index a5adb5e7ce..ba214d1a33 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -57,6 +57,7 @@ bool ADCE::runOnFunction(Function& F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isa<TerminatorInst>(I.getInstructionIterator()) ||
isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+ isa<LandingPadInst>(I.getInstructionIterator()) ||
I->mayHaveSideEffects()) {
alive.insert(I.getInstructionIterator());
worklist.push_back(I.getInstructionIterator());
@@ -65,7 +66,6 @@ bool ADCE::runOnFunction(Function& F) {
// Propagate liveness backwards to operands.
while (!worklist.empty()) {
Instruction* curr = worklist.pop_back_val();
-
for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
OI != OE; ++OI)
if (Instruction* Inst = dyn_cast<Instruction>(OI))
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index c223da60e0..a6f0cf3239 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMScalarOpts
DCE.cpp
DeadStoreElimination.cpp
EarlyCSE.cpp
+ GlobalMerge.cpp
GVN.cpp
IndVarSimplify.cpp
JumpThreading.cpp
@@ -29,6 +30,14 @@ add_llvm_library(LLVMScalarOpts
SimplifyCFGPass.cpp
SimplifyLibCalls.cpp
Sink.cpp
- TailDuplication.cpp
TailRecursionElimination.cpp
)
+
+add_llvm_library_dependencies(LLVMScalarOpts
+ LLVMAnalysis
+ LLVMCore
+ LLVMInstCombine
+ LLVMSupport
+ LLVMTarget
+ LLVMTransformUtils
+ )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 17beeb5aa8..f8f18b2173 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -58,6 +58,7 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -110,6 +111,7 @@ namespace {
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ bool PlaceDbgValues(Function &F);
};
}
@@ -132,6 +134,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ EverMadeChange |= PlaceDbgValues(F);
+
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
@@ -410,8 +417,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
InsertPt);
@@ -467,8 +473,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedCmp =
CmpInst::Create(CI->getOpcode(),
CI->getPredicate(), CI->getOperand(0),
@@ -551,22 +556,6 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI))
- if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()))
- if (!VI->isTerminator() &&
- (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) {
- DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstNonPHI());
- else
- DVI->insertAfter(VI);
- return true;
- }
-
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
@@ -746,13 +735,11 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
worklist.pop_back();
// Break use-def graph loops.
- if (Visited.count(V)) {
+ if (!Visited.insert(V)) {
Consensus = 0;
break;
}
- Visited.insert(V);
-
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
@@ -763,7 +750,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
- AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
NewAddrModeInsts, *TLI);
// This check is broken into two cases with very similar code to avoid using
@@ -822,7 +809,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
- BasicBlock::iterator InsertPt = MemoryInst;
+ IRBuilder<> Builder(MemoryInst);
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
@@ -833,7 +820,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
@@ -850,10 +837,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
if (V->getType()->isPointerTy())
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
if (V->getType() != IntPtrTy)
- V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
- "sunkaddr", InsertPt);
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
Result = V;
}
@@ -863,29 +849,27 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (V->getType() == IntPtrTy) {
// done.
} else if (V->getType()->isPointerTy()) {
- V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
- V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
} else {
- V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+ V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
}
if (AddrMode.Scale != 1)
- V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
- AddrMode.Scale),
- "sunkaddr", InsertPt);
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
- Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
- InsertPt);
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
@@ -894,7 +878,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
- Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Result = V;
}
@@ -902,7 +886,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Result == 0)
SunkAddr = Constant::getNullValue(Addr->getType());
else
- SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
}
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
@@ -1059,8 +1043,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
-
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
}
@@ -1159,3 +1142,34 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
return MadeChange;
}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+ bool MadeChange = false;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ Instruction *PrevNonDbgInst = NULL;
+ for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+ Instruction *Insn = BI; ++BI;
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ if (!DVI) {
+ PrevNonDbgInst = Insn;
+ continue;
+ }
+
+ Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e6089a9a43..a593d0f446 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,18 +52,18 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DominatorTree &DT = getAnalysis<DominatorTree>();
-
+
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
if (DT.isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
-
+
AA = 0; MD = 0;
return Changed;
}
-
+
bool runOnBasicBlock(BasicBlock &BB);
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
@@ -105,34 +105,34 @@ static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
SmallPtrSet<Value*, 16> *ValueSet = 0) {
SmallVector<Instruction*, 32> NowDeadInsts;
-
+
NowDeadInsts.push_back(I);
--NumFastOther;
-
+
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
-
+
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
-
+
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, 0);
-
+
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
-
+
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI))
NowDeadInsts.push_back(OpI);
}
-
+
DeadInst->eraseFromParent();
-
+
if (ValueSet) ValueSet->erase(DeadInst);
} while (!NowDeadInsts.empty());
}
@@ -159,11 +159,13 @@ static bool hasMemoryWrite(Instruction *I) {
}
/// getLocForWrite - Return a Location stored to by the specified instruction.
+/// If isRemovable returns true, this function and getLocForRead completely
+/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
-
+
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
@@ -174,10 +176,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
return AliasAnalysis::Location();
return Loc;
}
-
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
if (II == 0) return AliasAnalysis::Location();
-
+
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
@@ -185,7 +187,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
if (AA.getTargetData() == 0) return AliasAnalysis::Location();
-
+
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
return AliasAnalysis::Location(II->getArgOperand(0));
@@ -198,10 +200,10 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
/// instruction if any.
-static AliasAnalysis::Location
+static AliasAnalysis::Location
getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
assert(hasMemoryWrite(Inst) && "Unknown instruction case");
-
+
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
@@ -213,10 +215,10 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
/// isRemovable - If the value of this instruction and the memory it writes to
/// is unused, may we delete this instruction?
static bool isRemovable(Instruction *I) {
- // Don't remove volatile stores.
+ // Don't remove volatile/atomic stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return !SI->isVolatile();
-
+ return SI->isUnordered();
+
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
@@ -227,7 +229,7 @@ static bool isRemovable(Instruction *I) {
case Intrinsic::init_trampoline:
// Always safe to remove init_trampoline.
return true;
-
+
case Intrinsic::memset:
case Intrinsic::memmove:
case Intrinsic::memcpy:
@@ -255,14 +257,14 @@ static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
const TargetData *TD = AA.getTargetData();
if (TD == 0)
return AliasAnalysis::UnknownSize;
-
+
if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
// Get size information for the alloca
if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
return AliasAnalysis::UnknownSize;
}
-
+
assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
PointerType *PT = cast<PointerType>(V->getType());
return TD->getTypeAllocSize(PT->getElementType());
@@ -287,7 +289,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
AliasAnalysis &AA) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
-
+
// If the start pointers are the same, we just have to compare sizes to see if
// the later store was larger than the earlier store.
if (P1 == P2) {
@@ -302,33 +304,33 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
return Later.Ptr->getType() == Earlier.Ptr->getType();
return false;
}
-
+
// Make sure that the Later size is >= the Earlier size.
if (Later.Size < Earlier.Size)
return false;
return true;
}
-
+
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
Earlier.Size == AliasAnalysis::UnknownSize ||
Later.Size <= Earlier.Size || AA.getTargetData() == 0)
return false;
-
+
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
// other store to the same object.
const TargetData &TD = *AA.getTargetData();
-
+
const Value *UO1 = GetUnderlyingObject(P1, &TD),
*UO2 = GetUnderlyingObject(P2, &TD);
-
+
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
return false;
-
+
// If the "Later" store is to a recognizable object, get its size.
if (isObjectPointerWithTrustworthySize(UO2)) {
uint64_t ObjectSize =
@@ -336,26 +338,26 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
if (ObjectSize == Later.Size)
return true;
}
-
+
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
int64_t EarlierOff = 0, LaterOff = 0;
const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
-
+
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
return false;
// The later store completely overlaps the earlier store if:
- //
+ //
// 1. Both start at the same offset and the later one's size is greater than
// or equal to the earlier one's, or
//
// |--earlier--|
// |-- later --|
- //
+ //
// 2. The earlier store has an offset greater than the later offset, but which
// still lies completely within the later store.
//
@@ -373,7 +375,7 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
/// memory region into an identical pointer) then it doesn't actually make its
-/// input dead in the traditional sense. Consider this case:
+/// input dead in the traditional sense. Consider this case:
///
/// memcpy(A <- B)
/// memcpy(A <- A)
@@ -391,10 +393,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
-
+
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
-
+
// Okay, 'Inst' may copy over itself. However, we can still remove a the
// DepWrite instruction if we can prove that it reads from the same location
// as Inst. This handles useful cases like:
@@ -404,10 +406,10 @@ static bool isPossibleSelfRead(Instruction *Inst,
// aliases, so removing the first memcpy is safe (assuming it writes <= #
// bytes as the second one.
AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
-
+
if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
return false;
-
+
// If DepWrite doesn't read memory or if we can't prove it is a must alias,
// then it can't be considered dead.
return true;
@@ -420,43 +422,43 @@ static bool isPossibleSelfRead(Instruction *Inst,
bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool MadeChange = false;
-
+
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction *Inst = BBI++;
-
+
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(Inst)) {
MadeChange |= HandleFree(F);
continue;
}
-
+
// If we find something that writes memory, get its memory dependence.
if (!hasMemoryWrite(Inst))
continue;
MemDepResult InstDep = MD->getDependency(Inst);
-
+
// Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal() || InstDep.isUnknown())
+ if (!InstDep.isDef() && !InstDep.isClobber())
continue;
-
+
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ SI->getOperand(0) == DepLoad && isRemovable(SI)) {
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
-
+
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
-
+
DeleteDeadInstruction(SI, *MD);
-
+
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
@@ -467,15 +469,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
}
}
-
+
// Figure out what location is being stored to.
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
if (Loc.Ptr == 0)
continue;
-
- while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
+
+ while (InstDep.isDef() || InstDep.isClobber()) {
// Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we
// end up depending on a may- or must-aliased load, then we can't optimize
@@ -496,12 +498,12 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
-
+
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
-
+
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
BBI = Inst;
@@ -509,7 +511,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
--BBI;
break;
}
-
+
// If this is a may-aliased store that is clobbering the store value, we
// can keep searching past it for another must-aliased pointer that stores
// to the same location. For example, in:
@@ -519,20 +521,20 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// we can remove the first store to P even though we don't know if P and Q
// alias.
if (DepWrite == &BB.front()) break;
-
+
// Can't look past this instruction if it might read 'Loc'.
if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
break;
-
+
InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
}
}
-
+
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
MadeChange |= handleEndBlock(BB);
-
+
return MadeChange;
}
@@ -543,18 +545,18 @@ bool DSE::HandleFree(CallInst *F) {
MemDepResult Dep = MD->getDependency(F);
- while (!Dep.isNonLocal() && !Dep.isUnknown()) {
+ while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
return MadeChange;
-
+
Value *DepPointer =
GetUnderlyingObject(getStoredPointerOperand(Dependency));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
return MadeChange;
-
+
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD);
++NumFastStores;
@@ -567,7 +569,7 @@ bool DSE::HandleFree(CallInst *F) {
// free(s);
Dep = MD->getDependency(F);
};
-
+
return MadeChange;
}
@@ -579,28 +581,28 @@ bool DSE::HandleFree(CallInst *F) {
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
bool MadeChange = false;
-
+
// Keep track of all of the stack objects that are dead at the end of the
// function.
SmallPtrSet<Value*, 16> DeadStackObjects;
-
+
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
DeadStackObjects.insert(AI);
-
+
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
if (AI->hasByValAttr())
DeadStackObjects.insert(AI);
-
+
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
-
+
// If we find a store, check to see if it points into a dead stack value.
if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
@@ -609,10 +611,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Stores to stack values are valid candidates for removal.
if (DeadStackObjects.count(Pointer)) {
Instruction *Dead = BBI++;
-
+
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Object: " << *Pointer << '\n');
-
+
// DCE instructions only used to calculate that store.
DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
++NumFastStores;
@@ -620,7 +622,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
}
-
+
// Remove any dead non-memory-mutating instructions.
if (isInstructionTriviallyDead(BBI)) {
Instruction *Inst = BBI++;
@@ -629,55 +631,61 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
MadeChange = true;
continue;
}
-
+
if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
DeadStackObjects.erase(A);
continue;
}
-
+
if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
continue;
-
+
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
// See if the call site touches it.
- AliasAnalysis::ModRefResult A =
+ AliasAnalysis::ModRefResult A =
AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
+
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
LiveAllocas.push_back(*I);
}
-
+
for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
E = LiveAllocas.end(); I != E; ++I)
DeadStackObjects.erase(*I);
-
+
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
return MadeChange;
-
+
continue;
}
-
+
AliasAnalysis::Location LoadedLoc;
-
+
// If we encounter a use of the pointer, it is no longer considered dead
if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ if (!L->isUnordered()) // Be conservative with atomic/volatile load
+ break;
LoadedLoc = AA->getLocation(L);
} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
LoadedLoc = AA->getLocation(V);
} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
LoadedLoc = AA->getLocationForSource(MTI);
- } else {
- // Not a loading instruction.
+ } else if (!BBI->mayReadFromMemory()) {
+ // Instruction doesn't read memory. Note that stores that weren't removed
+ // above will hit this case.
continue;
+ } else {
+ // Unknown inst; assume it clobbers everything.
+ break;
}
// Remove any allocas from the DeadPointer set that are loaded, as this
@@ -689,7 +697,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (DeadStackObjects.empty())
break;
}
-
+
return MadeChange;
}
@@ -703,14 +711,14 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
return;
-
+
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
return;
}
-
+
SmallVector<Value*, 16> NowLive;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 3d3f17b26f..c0223d2bf1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -92,7 +92,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
// Hash in all of the operands as pointers.
unsigned Res = 0;
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
- Res ^= getHash(Inst->getOperand(i)) << i;
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
if (CastInst *CI = dyn_cast<CastInst>(Inst))
Res ^= getHash(CI->getType());
@@ -185,7 +185,7 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
"Cannot value number calls with metadata operands");
- Res ^= getHash(Inst->getOperand(i)) << i;
+ Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
}
// Mix in the opcode.
@@ -357,7 +357,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If this is a non-volatile load, process it.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
- if (LI->isVolatile()) {
+ if (!LI->isSimple()) {
LastStore = 0;
continue;
}
@@ -437,7 +437,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
// Remember that this was the last store we saw for DSE.
- if (!SI->isVolatile())
+ if (SI->isSimple())
LastStore = SI;
}
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index b4d5667bfa..a51cbb631b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -41,12 +41,16 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/PatternMatch.h"
using namespace llvm;
+using namespace PatternMatch;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumGVNSimpl, "Number of instructions simplified");
+STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
@@ -548,6 +552,9 @@ namespace {
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
+ unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
+ BasicBlock *Root);
+ bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
};
char GVN::ID = 0;
@@ -689,8 +696,8 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
+ uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
@@ -920,7 +927,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
@@ -946,10 +953,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+ SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx));
if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
- "tmp");
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
@@ -959,11 +965,10 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
if (ShiftAmt)
- SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
if (LoadSize != StoreSize)
- SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
- "tmp");
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
@@ -982,8 +987,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
if (Offset+LoadSize > SrcValSize) {
- assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
- assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
// If we have a load/load clobber an DepLI can be widened to cover this
// load, then we should widen it to the next power of 2 size big enough!
unsigned NewLoadSize = Offset+LoadSize;
@@ -1081,7 +1086,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
llvm::Type::getInt8PtrTy(Src->getContext()));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1274,7 +1279,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
+ if (Deps.size() == 1
+ && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber())
+ {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
@@ -1294,7 +1301,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
- if (DepInfo.isUnknown()) {
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1359,7 +1366,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
continue;
}
- assert(DepInfo.isDef() && "Expecting def here");
+ // DepInfo.isDef() here
Instruction *DepInst = DepInfo.getInst();
@@ -1446,8 +1453,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
Blockers.insert(UnavailableBlocks[i]);
- // Lets find first basic block with more than one predecessor. Walk backwards
- // through predecessors if needed.
+ // Let's find the first basic block with more than one predecessor. Walk
+ // backwards through predecessors if needed.
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
@@ -1519,10 +1526,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
+
+ if (LoadBB->isLandingPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
}
}
+
if (!NeedToSplit.empty()) {
toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
return false;
@@ -1660,7 +1676,7 @@ bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
- if (L->isVolatile())
+ if (!L->isSimple())
return false;
if (L->use_empty()) {
@@ -1747,7 +1763,11 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- if (Dep.isUnknown()) {
+ // If it is defined in another block, try harder.
+ if (Dep.isNonLocal())
+ return processNonLocalLoad(L);
+
+ if (!Dep.isDef()) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -1757,12 +1777,6 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
- // If it is defined in another block, try harder.
- if (Dep.isNonLocal())
- return processNonLocalLoad(L);
-
- assert(Dep.isDef() && "Expecting def here");
-
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
@@ -1874,6 +1888,138 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
return Val;
}
+/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
+/// use is dominated by the given basic block. Returns the number of uses that
+/// were replaced.
+unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
+ BasicBlock *Root) {
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ) {
+ Instruction *User = cast<Instruction>(*UI);
+ unsigned OpNum = UI.getOperandNo();
+ ++UI;
+
+ if (DT->dominates(Root, User->getParent())) {
+ User->setOperand(OpNum, To);
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+/// propagateEquality - The given values are known to be equal in every block
+/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
+/// 'RHS' everywhere in the scope. Returns whether a change was made.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+ if (LHS == RHS) return false;
+ assert(LHS->getType() == RHS->getType() && "Equal but types differ!");
+
+ // Don't try to propagate equalities between constants.
+ if (isa<Constant>(LHS) && isa<Constant>(RHS))
+ return false;
+
+ // Make sure that any constants are on the right-hand side. In general the
+ // best results are obtained by placing the longest lived value on the RHS.
+ if (isa<Constant>(LHS))
+ std::swap(LHS, RHS);
+
+ // If neither term is constant then bail out. This is not for correctness,
+ // it's just that the non-constant case is much less useful: it occurs just
+ // as often as the constant case but handling it hardly ever results in an
+ // improvement.
+ if (!isa<Constant>(RHS))
+ return false;
+
+ // If value numbering later deduces that an instruction in the scope is equal
+ // to 'LHS' then ensure it will be turned into 'RHS'.
+ addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root);
+
+ // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
+ // LHS always has at least one use that is not dominated by Root, this will
+ // never do anything if LHS has only one use.
+ bool Changed = false;
+ if (!LHS->hasOneUse()) {
+ unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ Changed |= NumReplacements > 0;
+ NumGVNEqProp += NumReplacements;
+ }
+
+ // Now try to deduce additional equalities from this one. For example, if the
+ // known equality was "(A != B)" == "false" then it follows that A and B are
+ // equal in the scope. Only boolean equalities with an explicit true or false
+ // RHS are currently supported.
+ if (!RHS->getType()->isIntegerTy(1))
+ // Not a boolean equality - bail out.
+ return Changed;
+ ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ // RHS neither 'true' nor 'false' - bail out.
+ return Changed;
+ // Whether RHS equals 'true'. Otherwise it equals 'false'.
+ bool isKnownTrue = CI->isAllOnesValue();
+ bool isKnownFalse = !isKnownTrue;
+
+ // If "A && B" is known true then both A and B are known true. If "A || B"
+ // is known false then both A and B are known false.
+ Value *A, *B;
+ if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+ (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+ Changed |= propagateEquality(A, RHS, Root);
+ Changed |= propagateEquality(B, RHS, Root);
+ return Changed;
+ }
+
+ // If we are propagating an equality like "(A == B)" == "true" then also
+ // propagate the equality A == B.
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+ // Only equality comparisons are supported.
+ if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+ (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) {
+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
+ Changed |= propagateEquality(Op0, Op1, Root);
+ }
+ return Changed;
+ }
+
+ return Changed;
+}
+
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
+ DominatorTree *DT) {
+ // First off, there must not be more than one edge from Src to Dst, there
+ // should be exactly one. So keep track of the number of times Src occurs
+ // as a predecessor of Dst and fail if it's more than once. Secondly, any
+ // other predecessors of Dst should be dominated by Dst (see logic below).
+ bool SawEdgeFromSrc = false;
+ for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (Pred == Src) {
+ // An edge from Src to Dst.
+ if (SawEdgeFromSrc)
+ // There are multiple edges from Src to Dst - fail.
+ return false;
+ SawEdgeFromSrc = true;
+ continue;
+ }
+ // If the predecessor is not dominated by Dst, then it must be possible to
+ // reach it either without passing through Src (and thus not via the edge)
+ // or by passing through Src but taking a different edge out of Src. Either
+ // way it is possible to reach Dst without passing via the edge, so fail.
+ if (!DT->dominates(Dst, *PI))
+ return false;
+ }
+ assert(SawEdgeFromSrc && "No edge between these basic blocks!");
+
+ // Every path from the entry block to Dst must at some point pass to Dst from
+ // a predecessor that is not dominated by Dst. This predecessor can only be
+ // Src, since all others are dominated by Dst. As there is only one edge from
+ // Src to Dst, the path passes by this edge.
+ return true;
+}
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
@@ -1891,6 +2037,7 @@ bool GVN::processInstruction(Instruction *I) {
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
markInstructionForDeletion(I);
+ ++NumGVNSimpl;
return true;
}
@@ -1903,30 +2050,45 @@ bool GVN::processInstruction(Instruction *I) {
return false;
}
- // For conditions branches, we can perform simple conditional propagation on
+ // For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
-
+
Value *BranchCond = BI->getCondition();
- uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
-
- if (TrueSucc->getSinglePredecessor())
- addToLeaderTable(CondVN,
- ConstantInt::getTrue(TrueSucc->getContext()),
- TrueSucc);
- if (FalseSucc->getSinglePredecessor())
- addToLeaderTable(CondVN,
- ConstantInt::getFalse(TrueSucc->getContext()),
- FalseSucc);
-
- return false;
+ BasicBlock *Parent = BI->getParent();
+ bool Changed = false;
+
+ if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
+ Changed |= propagateEquality(BranchCond,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
+
+ if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
+ Changed |= propagateEquality(BranchCond,
+ ConstantInt::getFalse(FalseSucc->getContext()),
+ FalseSucc);
+
+ return Changed;
}
-
+
+ // For switches, propagate the case values into the case destinations.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ Value *SwitchCond = SI->getCondition();
+ BasicBlock *Parent = SI->getParent();
+ bool Changed = false;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
+ BasicBlock *Dst = SI->getSuccessor(i);
+ if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
+ Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+ }
+ return Changed;
+ }
+
// Instructions with void type don't return a value, so there's
// no point in trying to find redudancies in them.
if (I->getType()->isVoidTy()) return false;
@@ -2071,6 +2233,9 @@ bool GVN::performPRE(Function &F) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
+ // Don't perform PRE on a landing pad.
+ if (CurrentBlock->isLandingPad()) continue;
+
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end(); BI != BE; ) {
Instruction *CurInst = BI++;
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
new file mode 100644
index 0000000000..0772b48729
--- /dev/null
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -0,0 +1,226 @@
+//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "global-merge"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMerged , "Number of globals merged");
+namespace {
+ class GlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit GlobalMerge(const TargetLowering *tli = 0)
+ : FunctionPass(ID), TLI(tli) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td) : TD(td) { }
+
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+INITIALIZE_PASS(GlobalMerge, "global-merge",
+ "Global Merge", false, false)
+
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; j != e; ++j) {
+ Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += TD->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ }
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "_MergedGlobals");
+ for (size_t k = i; k < j; ++k) {
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ NumMerged++;
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool GlobalMerge::doInitialization(Module &M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ unsigned Alignment = I->getAlignment();
+ Type *Ty = I->getType()->getElementType();
+ if (Alignment > TD->getABITypeAlignment(Ty))
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+ const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+ if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+ BSSGlobals.push_back(I);
+ else if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ if (BSSGlobals.size() > 1)
+ Changed |= doMerge(BSSGlobals, M, false);
+
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
+ return new GlobalMerge(tli);
+}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 50140d9259..75fa011a14 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -11,17 +11,6 @@
// computations derived from them) into simpler forms suitable for subsequent
// analysis and transformation.
//
-// This transformation makes the following changes to each loop with an
-// identifiable induction variable:
-// 1. All loops are transformed to have a SINGLE canonical induction variable
-// which starts at zero and steps by one.
-// 2. The canonical induction variable is guaranteed to be the first PHI node
-// in the loop header block.
-// 3. The canonical induction variable is guaranteed to be in a wide enough
-// type so that IV expressions need not be (directly) zero-extended or
-// sign-extended.
-// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
-//
// If the trip count of a loop is computable, this pass also makes the following
// changes:
// 1. The exit condition for the loop is canonicalized to compare the
@@ -33,9 +22,6 @@
// purpose of the loop is to compute the exit value of some derived
// expression, this transformation will make the loop dead.
//
-// This transformation should be followed by strength reduction after all of the
-// desired loop transformations have been performed.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "indvars"
@@ -57,11 +43,11 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumRemoved , "Number of aux indvars removed");
@@ -69,21 +55,21 @@ STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumInserted , "Number of canonical indvars added");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
-STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-static cl::opt<bool> DisableIVRewrite(
- "disable-iv-rewrite", cl::Hidden,
- cl::desc("Disable canonical induction variable rewriting"));
-
-// Temporary flag for use with -disable-iv-rewrite to force a canonical IV for
-// LFTR purposes.
-static cl::opt<bool> ForceLFTR(
- "force-lftr", cl::Hidden,
- cl::desc("Enable forced linear function test replacement"));
+namespace llvm {
+ cl::opt<bool> EnableIVRewrite(
+ "enable-iv-rewrite", cl::Hidden,
+ cl::desc("Enable canonical induction variable rewriting"));
+
+ // Trip count verification can be enabled by default under NDEBUG if we
+ // implement a strong expression equivalence checker in SCEV. Until then, we
+ // use the verify-indvars flag, which may assert in some cases.
+ cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
+}
namespace {
class IndVarSimplify : public LoopPass {
@@ -111,12 +97,12 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
AU.addRequired<IVUsers>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
AU.addPreserved<IVUsers>();
AU.setPreservesCFG();
}
@@ -131,18 +117,9 @@ namespace {
void HandleFloatingPointIV(Loop *L, PHINode *PH);
void RewriteNonIntegerIVs(Loop *L);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
- void SimplifyIVUsers(SCEVExpander &Rewriter);
- void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
-
- bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
- void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void EliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned);
+ void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
- void SimplifyCongruentIVs(Loop *L);
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
@@ -240,8 +217,8 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
InsertPt = InsertBB->getTerminator();
}
assert(InsertPt && "Missing phi operand");
- assert(!isa<Instruction>(Def) ||
- DT->dominates(cast<Instruction>(Def), InsertPt) &&
+ assert((!isa<Instruction>(Def) ||
+ DT->dominates(cast<Instruction>(Def), InsertPt)) &&
"def does not dominate all uses");
return InsertPt;
}
@@ -372,14 +349,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Positive and negative strides have different safety conditions.
if (IncValue > 0) {
// If we have a positive stride, we require the init to be less than the
- // exit value and an equality or less than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+ // exit value.
+ if (InitValue >= ExitValue)
return;
uint32_t Range = uint32_t(ExitValue-InitValue);
- if (NewPred == CmpInst::ICMP_SLE) {
- // Normalize SLE -> SLT, check for infinite loop.
+ // Check for infinite loop, either:
+ // while (i <= Exit) or until (i > Exit)
+ if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
if (++Range == 0) return; // Range overflows.
}
@@ -399,14 +376,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
} else {
// If we have a negative stride, we require the init to be greater than the
- // exit value and an equality or greater than comparison.
- if (InitValue >= ExitValue ||
- NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+ // exit value.
+ if (InitValue <= ExitValue)
return;
uint32_t Range = uint32_t(InitValue-ExitValue);
- if (NewPred == CmpInst::ICMP_SGE) {
- // Normalize SGE -> SGT, check for infinite loop.
+ // Check for infinite loop, either:
+ // while (i >= Exit) or until (i < Exit)
+ if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
if (++Range == 0) return; // Range overflows.
}
@@ -464,7 +441,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// platforms.
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstNonPHI());
+ PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN);
}
@@ -472,6 +449,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Add a new IVUsers entry for the newly-created integer PHI.
if (IU)
IU->AddUsersIfInteresting(NewPHI);
+
+ Changed = true;
}
void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
@@ -617,45 +596,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
//===----------------------------------------------------------------------===//
// Rewrite IV users based on a canonical IV.
-// To be replaced by -disable-iv-rewrite.
+// Only for use with -enable-iv-rewrite.
//===----------------------------------------------------------------------===//
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-/// This is the old approach to IV simplification to be replaced by
-/// SimplifyIVUsersNoRewrite.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
- // Each round of simplification involves a round of eliminating operations
- // followed by a round of widening IVs. A single IVUsers worklist is used
- // across all rounds. The inner loop advances the user. If widening exposes
- // more uses, then another pass through the outer loop is triggered.
- for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
- Instruction *UseInst = I->getUser();
- Value *IVOperand = I->getOperandValToReplace();
-
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- continue;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned);
- continue;
- }
- }
- }
-}
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables. Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials. For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
+/// FIXME: It is an extremely bad idea to indvar substitute anything more
+/// complex than affine induction variables. Doing so will put expensive
+/// polynomial evaluations inside of the loop, and the str reduction pass
+/// currently can only reduce affine polynomials. For now just disable
+/// indvar subst on anything more complex than an affine addrec, unless
+/// it can be expanded to a trivial value.
static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
// Loop-invariant values are safe.
if (SE->isLoopInvariant(S, L)) return true;
@@ -666,7 +615,8 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
return AR->isAffine();
// An add is safe it all its operands are safe.
- if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+ if (const SCEVCommutativeExpr *Commutative
+ = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
if (!isSafe(*I, L, SE)) return false;
@@ -771,18 +721,37 @@ namespace {
// extend operations. This information is recorded by CollectExtend and
// provides the input to WidenIV.
struct WideIVInfo {
+ PHINode *NarrowIV;
Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was an sext user seen before a zext?
+ bool IsSigned; // Was an sext user seen before a zext?
+
+ WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ };
+
+ class WideIVVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const TargetData *TD;
- WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+ public:
+ WideIVInfo WI;
+
+ WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
+ const TargetData *TData) :
+ SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ virtual void visitCast(CastInst *Cast);
};
}
-/// CollectExtend - Update information about the induction variable that is
+/// visitCast - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
- ScalarEvolution *SE, const TargetData *TD) {
+void WideIVVisitor::visitCast(CastInst *Cast) {
+ bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+ if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
+ return;
+
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
if (TD && !TD->isLegalInteger(Width))
@@ -845,10 +814,10 @@ class WidenIV {
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
public:
- WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
ScalarEvolution *SEv, DominatorTree *DTree,
SmallVectorImpl<WeakVH> &DI) :
- OrigPhi(PN),
+ OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
IsSigned(WI.IsSigned),
LI(LInfo),
@@ -865,18 +834,42 @@ public:
PHINode *CreateWideIV(SCEVExpander &Rewriter);
protected:
+ Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
+
Instruction *CloneIVUser(NarrowIVDefUse DU);
const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+ const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+
Instruction *WidenIVUse(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-static Value *getExtend( Value *NarrowOper, Type *WideType,
- bool IsSigned, IRBuilder<> &Builder) {
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return true;
+
+ return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use) {
+ // Set the debug location and conservative insertion point.
+ IRBuilder<> Builder(Use);
+ // Hoist the insertion point into loop preheaders as far as possible.
+ for (const Loop *L = LI->getLoopFor(Use->getParent());
+ L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+ L = L->getParentLoop())
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+
return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
Builder.CreateZExt(NarrowOper, WideType);
}
@@ -901,22 +894,21 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
case Instruction::AShr:
DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
- IRBuilder<> Builder(DU.NarrowUse);
-
// Replace NarrowDef operands with WideDef. Otherwise, we don't know
// anything about the narrow operand yet so must insert a [sz]ext. It is
// probably loop invariant and will be folded or hoisted. If it actually
// comes from a widened IV, it should be removed during a future call to
// WidenIVUse.
Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, Builder);
+ getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, Builder);
+ getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
LHS, RHS,
NarrowBO->getName());
+ IRBuilder<> Builder(DU.NarrowUse);
Builder.Insert(WideBO);
if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
@@ -928,45 +920,46 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
llvm_unreachable(0);
}
-/// HoistStep - Attempt to hoist an IV increment above a potential use.
-///
-/// To successfully hoist, two criteria must be met:
-/// - IncV operands dominate InsertPos and
-/// - InsertPos dominates IncV
-///
-/// Meeting the second condition means that we don't need to check all of IncV's
-/// existing uses (it's moving up in the domtree).
-///
-/// This does not yet recursively hoist the operands, although that would
-/// not be difficult.
-static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
- const DominatorTree *DT)
-{
- if (DT->dominates(IncV, InsertPos))
- return true;
+/// No-wrap operations can transfer sign extension of their result to their
+/// operands. Generate the SCEV value for the widened operation without
+/// actually modifying the IR yet. If the expression after extending the
+/// operands is an AddRec for this loop, return it.
+const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+ // Handle the common case of add<nsw/nuw>
+ if (DU.NarrowUse->getOpcode() != Instruction::Add)
+ return 0;
- if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
- return false;
+ // One operand (NarrowDef) has already been extended to WideDef. Now determine
+ // if extending the other will lead to a recurrence.
+ unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
+
+ const SCEV *ExtendOperExpr = 0;
+ const OverflowingBinaryOperator *OBO =
+ cast<OverflowingBinaryOperator>(DU.NarrowUse);
+ if (IsSigned && OBO->hasNoSignedWrap())
+ ExtendOperExpr = SE->getSignExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else if(!IsSigned && OBO->hasNoUnsignedWrap())
+ ExtendOperExpr = SE->getZeroExtendExpr(
+ SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
+ else
+ return 0;
- if (IncV->mayHaveSideEffects())
- return false;
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
+ IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
- // Attempt to hoist IncV
- for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
- OI != OE; ++OI) {
- Instruction *OInst = dyn_cast<Instruction>(OI);
- if (OInst && !DT->dominates(OInst, InsertPos))
- return false;
- }
- IncV->moveBefore(InsertPos);
- return true;
+ if (!AddRec || AddRec->getLoop() != L)
+ return 0;
+ return AddRec;
}
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
+/// GetWideRecurrence - Is this instruction potentially interesting from
+/// IVUsers' perspective after widening it's type? In other words, can the
+/// extend be safely hoisted out of the loop with SCEV reducing the value to a
+/// recurrence on the same loop. If so, return the sign or zero extended
+/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return 0;
@@ -985,7 +978,6 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
return 0;
-
return AddRec;
}
@@ -1039,6 +1031,9 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
// Does this user itself evaluate to a recurrence after widening?
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
if (!WideAddRec) {
+ WideAddRec = GetExtendedOperandRecurrence(DU);
+ }
+ if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
@@ -1055,9 +1050,9 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
- if (WideAddRec == WideIncExpr && HoistStep(WideInc, DU.NarrowUse, DT)) {
+ if (WideAddRec == WideIncExpr
+ && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
WideUse = WideInc;
- }
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
@@ -1178,183 +1173,17 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
-void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
- unsigned IVOperIdx = 0;
- ICmpInst::Predicate Pred = ICmp->getPredicate();
- if (IVOperand != ICmp->getOperand(0)) {
- // Swapped
- assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
- IVOperIdx = 1;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- }
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
- const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // If the condition is always true or always false, replace it with
- // a constant value.
- if (SE->isKnownPredicate(Pred, S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
- else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
- ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
- else
- return;
- DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
- ++NumElimCmp;
- Changed = true;
- DeadInsts.push_back(ICmp);
-}
-
-void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
- Value *IVOperand,
- bool IsSigned) {
- // We're only interested in the case where we know something about
- // the numerator.
- if (IVOperand != Rem->getOperand(0))
- return;
-
- // Get the SCEVs for the ICmp operands.
- const SCEV *S = SE->getSCEV(Rem->getOperand(0));
- const SCEV *X = SE->getSCEV(Rem->getOperand(1));
-
- // Simplify unnecessary loops away.
- const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
- S = SE->getSCEVAtScope(S, ICmpLoop);
- X = SE->getSCEVAtScope(X, ICmpLoop);
-
- // i % n --> i if i is in [0,n).
- if ((!IsSigned || SE->isKnownNonNegative(S)) &&
- SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- S, X))
- Rem->replaceAllUsesWith(Rem->getOperand(0));
- else {
- // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
- const SCEV *LessOne =
- SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
- if (IsSigned && !SE->isKnownNonNegative(LessOne))
- return;
-
- if (!SE->isKnownPredicate(IsSigned ?
- ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- LessOne, X))
- return;
-
- ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
- Rem->getOperand(0), Rem->getOperand(1),
- "tmp");
- SelectInst *Sel =
- SelectInst::Create(ICmp,
- ConstantInt::get(Rem->getType(), 0),
- Rem->getOperand(0), "tmp", Rem);
- Rem->replaceAllUsesWith(Sel);
- }
-
- // Inform IVUsers about the new users.
- if (IU) {
- if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
- IU->AddUsersIfInteresting(I);
- }
- DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
- ++NumElimRem;
- Changed = true;
- DeadInsts.push_back(Rem);
-}
-
-/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
-/// no observable side-effect given the range of IV values.
-bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
- Instruction *IVOperand) {
- if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
- EliminateIVComparison(ICmp, IVOperand);
- return true;
- }
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- EliminateIVRemainder(Rem, IVOperand, IsSigned);
- return true;
- }
- }
-
- // Eliminate any operation that SCEV can prove is an identity function.
- if (!SE->isSCEVable(UseInst->getType()) ||
- (UseInst->getType() != IVOperand->getType()) ||
- (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
- return false;
-
- DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
-
- UseInst->replaceAllUsesWith(IVOperand);
- ++NumElimIdentity;
- Changed = true;
- DeadInsts.push_back(UseInst);
- return true;
-}
-
-/// pushIVUsers - Add all uses of Def to the current IV's worklist.
-///
-static void pushIVUsers(
- Instruction *Def,
- SmallPtrSet<Instruction*,16> &Simplified,
- SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
-
- for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
-
- // Avoid infinite or exponential worklist processing.
- // Also ensure unique worklist users.
- // If Def is a LoopPhi, it may not be in the Simplified set, so check for
- // self edges first.
- if (User != Def && Simplified.insert(User))
- SimpleIVUsers.push_back(std::make_pair(User, Def));
- }
-}
-
-/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
-/// expression in terms of that IV.
-///
-/// This is similar to IVUsers' isInsteresting() but processes each instruction
-/// non-recursively when the operand is already known to be a simpleIVUser.
-///
-static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
- if (!SE->isSCEVable(I->getType()))
- return false;
-
- // Get the symbolic expression for this instruction.
- const SCEV *S = SE->getSCEV(I);
-
- // Only consider affine recurrences.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
- if (AR && AR->getLoop() == L)
- return true;
-
- return false;
-}
-
-/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
-/// of IV users. Each successive simplification may push more users which may
+/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
+/// users. Each successive simplification may push more users which may
/// themselves be candidates for simplification.
///
-/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
-/// simplifies instructions in-place during analysis. Rather than rewriting
-/// induction variables bottom-up from their users, it transforms a chain of
-/// IVUsers top-down, updating the IR only when it encouters a clear
-/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
-/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
-/// extend elimination.
+/// Sign/Zero extend elimination is interleaved with IV simplification.
///
-/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
-///
-void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
- std::map<PHINode *, WideIVInfo> WideIVMap;
+void IndVarSimplify::SimplifyAndExtend(Loop *L,
+ SCEVExpander &Rewriter,
+ LPPassManager &LPM) {
+ SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
@@ -1370,108 +1199,27 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
- // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+ // other SCEV based analysis prior to SimplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVInfo WI;
-
- // Instructions processed by SimplifyIVUsers for CurrIV.
- SmallPtrSet<Instruction*,16> Simplified;
+ WideIVVisitor WIV(CurrIV, SE, TD);
- // Use-def pairs if IV users waiting to be processed for CurrIV.
- SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+ Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
- // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
- // called multiple times for the same LoopPhi. This is the proper thing to
- // do for loop header phis that use each other.
- pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
-
- while (!SimpleIVUsers.empty()) {
- Instruction *UseInst, *Operand;
- tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
- // Bypass back edges to avoid extra work.
- if (UseInst == CurrIV) continue;
-
- if (EliminateIVUser(UseInst, Operand)) {
- pushIVUsers(Operand, Simplified, SimpleIVUsers);
- continue;
- }
- if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
- bool IsSigned = Cast->getOpcode() == Instruction::SExt;
- if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
- CollectExtend(Cast, IsSigned, WI, SE, TD);
- }
- continue;
- }
- if (isSimpleIVUser(UseInst, L, SE)) {
- pushIVUsers(UseInst, Simplified, SimpleIVUsers);
- }
- }
- if (WI.WidestNativeType) {
- WideIVMap[CurrIV] = WI;
+ if (WIV.WI.WidestNativeType) {
+ WideIVs.push_back(WIV.WI);
}
} while(!LoopPhis.empty());
- for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
- E = WideIVMap.end(); I != E; ++I) {
- WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+ for (; !WideIVs.empty(); WideIVs.pop_back()) {
+ WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
}
- WideIVMap.clear();
- }
-}
-
-/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
-/// populate ExprToIVMap for use later.
-///
-void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
- DenseMap<const SCEV *, PHINode *> ExprToIVMap;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- PHINode *Phi = cast<PHINode>(I);
- if (!SE->isSCEVable(Phi->getType()))
- continue;
-
- const SCEV *S = SE->getSCEV(Phi);
- DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
- bool Inserted;
- tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
- if (Inserted)
- continue;
- PHINode *OrigPhi = Pos->second;
-
- // If one phi derives from the other via GEPs, types may differ.
- if (OrigPhi->getType() != Phi->getType())
- continue;
-
- // Replacing the congruent phi is sufficient because acyclic redundancy
- // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
- // that a phi is congruent, it's almost certain to be the head of an IV
- // user cycle that is isomorphic with the original phi. So it's worth
- // eagerly cleaning up the common case of a single IV increment.
- if (BasicBlock *LatchBlock = L->getLoopLatch()) {
- Instruction *OrigInc =
- cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
- Instruction *IsomorphicInc =
- cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
- if (OrigInc != IsomorphicInc &&
- OrigInc->getType() == IsomorphicInc->getType() &&
- SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
- HoistStep(OrigInc, IsomorphicInc, DT)) {
- DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
- << *IsomorphicInc << '\n');
- IsomorphicInc->replaceAllUsesWith(OrigInc);
- DeadInsts.push_back(IsomorphicInc);
- }
- }
- DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
- ++NumElimIV;
- Phi->replaceAllUsesWith(OrigPhi);
- DeadInsts.push_back(Phi);
}
}
@@ -1479,9 +1227,9 @@ void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-// Check for expressions that ScalarEvolution generates to compute
-// BackedgeTakenInfo. If these expressions have not been reduced, then expanding
-// them may incur additional cost (albeit in the loop preheader).
+/// Check for expressions that ScalarEvolution generates to compute
+/// BackedgeTakenInfo. If these expressions have not been reduced, then
+/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
ScalarEvolution *SE) {
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
@@ -1502,7 +1250,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
}
}
- if (!DisableIVRewrite || ForceLFTR)
+ if (EnableIVRewrite)
return false;
// Recurse past add expressions, which commonly occur in the
@@ -1580,17 +1328,6 @@ static Type *getBackedgeIVType(Loop *L) {
return Ty;
}
-/// isLoopInvariant - Perform a quick domtree based check for loop invariance
-/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
-/// gratuitous for this purpose.
-static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) {
- Instruction *Inst = dyn_cast<Instruction>(V);
- if (!Inst)
- return true;
-
- return DT->properlyDominates(Inst->getParent(), L->getHeader());
-}
-
/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
/// invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
@@ -1779,10 +1516,9 @@ LinearFunctionTestReplace(Loop *L,
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
- // In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this
- // mode, LFTR can ignore IV overflow and truncate to the width of
+ // LFTR can ignore IV overflow and truncate to the width of
// BECount. This avoids materializing the add(zext(add)) expression.
- Type *CntTy = DisableIVRewrite ?
+ Type *CntTy = !EnableIVRewrite ?
BackedgeTakenCount->getType() : IndVar->getType();
const SCEV *IVLimit = BackedgeTakenCount;
@@ -1832,7 +1568,7 @@ LinearFunctionTestReplace(Loop *L,
const SCEV *IVInit = AR->getStart();
// For pointer types, sign extend BECount in order to materialize a GEP.
- // Note that for DisableIVRewrite, we never run SCEVExpander on a
+ // Note that for without EnableIVRewrite, we never run SCEVExpander on a
// pointer type, because we must preserve the existing GEPs. Instead we
// directly generate a GEP later.
if (IVInit->getType()->isPointerTy()) {
@@ -1919,7 +1655,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+ Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
@@ -1940,6 +1676,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip landingpad instructions.
+ if (isa<LandingPadInst>(I))
+ continue;
+
// Don't sink static AllocaInsts out of the entry block, which would
// turn them into dynamic allocas!
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
@@ -2006,7 +1746,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
@@ -2024,6 +1764,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, "indvars");
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
// Eliminate redundant IV users.
//
@@ -2031,9 +1774,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
- if (DisableIVRewrite) {
+ if (!EnableIVRewrite) {
Rewriter.disableCanonicalMode();
- SimplifyIVUsersNoRewrite(L, Rewriter);
+ SimplifyAndExtend(L, Rewriter, LPM);
}
// Check to see if this loop has a computable loop-invariant execution count.
@@ -2046,26 +1789,25 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV users.
- if (!DisableIVRewrite)
- SimplifyIVUsers(Rewriter);
+ if (EnableIVRewrite)
+ Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
// Eliminate redundant IV cycles.
- if (DisableIVRewrite)
- SimplifyCongruentIVs(L);
+ if (!EnableIVRewrite)
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
Type *LargestType = 0;
bool NeedCannIV = false;
- bool ReuseIVForExit = DisableIVRewrite && !ForceLFTR;
bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
- if (ExpandBECount && !ReuseIVForExit) {
+ if (EnableIVRewrite && ExpandBECount) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
NeedCannIV = true;
Type *Ty = BackedgeTakenCount->getType();
- if (DisableIVRewrite) {
+ if (!EnableIVRewrite) {
// In this mode, SimplifyIVUsers may have already widened the IV used by
// the backedge test and inserted a Trunc on the compare's operand. Get
// the wider type to avoid creating a redundant narrow IV only used by the
@@ -2077,7 +1819,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE->getTypeSizeInBits(LargestType))
LargestType = SE->getEffectiveSCEVType(Ty);
}
- if (!DisableIVRewrite) {
+ if (EnableIVRewrite) {
for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
NeedCannIV = true;
Type *Ty =
@@ -2119,10 +1861,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// the end of the pass.
while (!OldCannIVs.empty()) {
PHINode *OldCannIV = OldCannIVs.pop_back_val();
- OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
}
}
- else if (ExpandBECount && ReuseIVForExit && needsLFTR(L, DT)) {
+ else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
}
// If we have a trip count expression, rewrite the loop's exit condition
@@ -2143,7 +1885,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
}
// Rewrite IV-derived expressions.
- if (!DisableIVRewrite)
+ if (EnableIVRewrite)
RewriteIVExpressions(L, Rewriter);
// Clear the rewriter cache, because values that are in the rewriter's cache
@@ -2180,7 +1922,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
#ifndef NDEBUG
- if (DisableIVRewrite && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ if (!EnableIVRewrite && VerifyIndvars &&
+ !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index b500d5b4fd..f410af3275 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -811,8 +811,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
/// important optimization that encourages jump threading, and needs to be run
/// interlaced with other jump threading tasks.
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
- // Don't hack volatile loads.
- if (LI->isVolatile()) return false;
+ // Don't hack volatile/atomic loads.
+ if (!LI->isSimple()) return false;
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 66add6ca01..b79bb1300f 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -151,6 +151,11 @@ namespace {
///
bool isSafeToExecuteUnconditionally(Instruction &I);
+ /// isGuaranteedToExecute - Check that the instruction is guaranteed to
+ /// execute.
+ ///
+ bool isGuaranteedToExecute(Instruction &I);
+
/// pointerInvalidatedByLoop - Return true if the body of this loop may
/// store into the memory location pointed to by V.
///
@@ -357,8 +362,8 @@ void LICM::HoistRegion(DomTreeNode *N) {
bool LICM::canSinkOrHoistInst(Instruction &I) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- if (LI->isVolatile())
- return false; // Don't hoist volatile loads!
+ if (!LI->isUnordered())
+ return false; // Don't hoist volatile/atomic loads!
// Loads from constant memory are always safe to move, even if they end up
// in the same alias set as something that ends up being modified.
@@ -461,7 +466,7 @@ void LICM::sink(Instruction &I) {
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+ I.moveBefore(ExitBlocks[0]->getFirstInsertionPt());
// This instruction is no longer in the AST for the current loop, because
// we just sunk it out of the loop. If we just sunk it into an outer
@@ -504,7 +509,7 @@ void LICM::sink(Instruction &I) {
continue;
// Insert the code after the last PHI node.
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
// If this is the first exit block processed, just move the original
// instruction, otherwise clone the original instruction and insert
@@ -577,6 +582,10 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
if (Inst.isSafeToSpeculativelyExecute())
return true;
+ return isGuaranteedToExecute(Inst);
+}
+
+bool LICM::isGuaranteedToExecute(Instruction &Inst) {
// Otherwise we have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
// which does not execute this instruction, so we can't hoist it.
@@ -635,7 +644,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -713,34 +722,41 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- unsigned InstAlignment;
if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
- assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
- InstAlignment = load->getAlignment();
+ assert(!load->isVolatile() && "AST broken");
+ if (!load->isSimple())
+ return;
} else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (Use->getOperand(1) != ASIV)
continue;
- InstAlignment = store->getAlignment();
- assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ assert(!store->isVolatile() && "AST broken");
+ if (!store->isSimple())
+ return;
+
+ // Note that we only check GuaranteedToExecute inside the store case
+ // so that we do not introduce stores where they did not exist before
+ // (which would break the LLVM concurrency model).
+
+ // If the alignment of this instruction allows us to specify a more
+ // restrictive (and performant) alignment and if we are sure this
+ // instruction will be executed, update the alignment.
+ // Larger is better, with the exception of 0 being the best alignment.
+ unsigned InstAlignment = store->getAlignment();
+ if ((InstAlignment > Alignment || InstAlignment == 0)
+ && (Alignment != 0))
+ if (isGuaranteedToExecute(*Use)) {
+ GuaranteedToExecute = true;
+ Alignment = InstAlignment;
+ }
+
+ if (!GuaranteedToExecute)
+ GuaranteedToExecute = isGuaranteedToExecute(*Use);
+
} else
return; // Not a load or store.
- // If the alignment of this instruction allows us to specify a more
- // restrictive (and performant) alignment and if we are sure this
- // instruction will be executed, update the alignment.
- // Larger is better, with the exception of 0 being the best alignment.
- if ((InstAlignment > Alignment || InstAlignment == 0)
- && (Alignment != 0))
- if (isSafeToExecuteUnconditionally(*Use)) {
- GuaranteedToExecute = true;
- Alignment = InstAlignment;
- }
-
- if (!GuaranteedToExecute)
- GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-
LoopUses.push_back(Use);
}
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ea4c515f41..ad15cbb9b4 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -267,7 +267,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
@@ -314,7 +314,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
const SCEVAddRecExpr *LoadEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
- StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
return true;
}
@@ -463,7 +463,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
SplatValue = 0;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
- // do anything with a 3-byte store, for example.
+ // do anything with a 3-byte store.
return false;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e90b5bcacd..3e122c2a86 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -70,12 +70,27 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
+namespace llvm {
+cl::opt<bool> EnableNested(
+ "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
+
+cl::opt<bool> EnableRetry(
+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+
+// Temporary flag to cleanup congruent phis after LSR phi expansion.
+// It's currently disabled until we can determine whether it's truly useful or
+// not. The flag should be removed after the v3.0 release.
+cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
+}
+
namespace {
/// RegSortData - This class holds data which is used to order reuse candidates.
@@ -670,6 +685,21 @@ public:
void Loose();
+#ifndef NDEBUG
+ // Once any of the metrics loses, they must all remain losers.
+ bool isValid() {
+ return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ | ImmCost | SetupCost) != ~0u)
+ || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ & ImmCost & SetupCost) == ~0u);
+ }
+#endif
+
+ bool isLoser() {
+ assert(isValid() && "invalid cost");
+ return NumRegs == ~0u;
+ }
+
void RateFormula(const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
@@ -702,34 +732,48 @@ void Cost::RateRegister(const SCEV *Reg,
if (AR->getLoop() == L)
AddRecCost += 1; /// TODO: This should be a function of the stride.
- // If this is an addrec for a loop that's already been visited by LSR,
- // don't second-guess its addrec phi nodes. LSR isn't currently smart
- // enough to reason about more than one loop at a time. Consider these
- // registers free and leave them alone.
- else if (L->contains(AR->getLoop()) ||
+ // If this is an addrec for another loop, don't second-guess its addrec phi
+ // nodes. LSR isn't currently smart enough to reason about more than one
+ // loop at a time. LSR has either already run on inner loops, will not run
+ // on other loops, and cannot be expected to change sibling loops. If the
+ // AddRec exists, consider it's register free and leave it alone. Otherwise,
+ // do not consider this formula at all.
+ // FIXME: why do we need to generate such fomulae?
+ else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (SE.isSCEVable(PN->getType()) &&
(SE.getEffectiveSCEVType(PN->getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(PN) == AR)
return;
-
+ }
+ if (!EnableNested) {
+ Loose();
+ return;
+ }
// If this isn't one of the addrecs that the loop already has, it
// would require a costly new phi and add. TODO: This isn't
// precisely modeled right now.
++NumBaseAdds;
- if (!Regs.count(AR->getStart()))
+ if (!Regs.count(AR->getStart())) {
RateRegister(AR->getStart(), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
}
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
- if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
- if (!Regs.count(AR->getStart()))
+ if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
+ if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+ if (isLoser())
+ return;
+ }
+ }
}
++NumRegs;
@@ -769,6 +813,8 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ if (isLoser())
+ return;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
@@ -778,6 +824,8 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ if (isLoser())
+ return;
}
// Determine how many (unfolded) adds we'll need inside the loop.
@@ -795,6 +843,7 @@ void Cost::RateFormula(const Formula &F,
else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
+ assert(isValid() && "invalid cost");
}
/// Loose - Set this cost to a losing value.
@@ -1156,7 +1205,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+ if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
return false;
}
@@ -1427,6 +1476,7 @@ void LSRInstance::OptimizeShadowIV() {
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Type *DestTy = NULL;
+ bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
@@ -1440,10 +1490,14 @@ void LSRInstance::OptimizeShadowIV() {
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
- if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = false;
DestTy = UCast->getDestTy();
- else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+ }
+ else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
+ IsSigned = true;
DestTy = SCast->getDestTy();
+ }
if (!DestTy) continue;
if (TLI) {
@@ -1474,7 +1528,9 @@ void LSRInstance::OptimizeShadowIV() {
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
- Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
+ (double)Init->getSExtValue() :
+ (double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -3275,6 +3331,9 @@ retry:
skip:;
}
+ if (!EnableRetry && !AnySatisfiedReqRegs)
+ return;
+
// If none of the formulae had all of the required registers, relax the
// constraint so that we don't exclude all formulae.
if (!AnySatisfiedReqRegs) {
@@ -3298,6 +3357,10 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
// SolveRecurse does all the work.
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
+ if (Solution.empty()) {
+ DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
+ return;
+ }
// Ok, we've now made all our decisions.
DEBUG(dbgs() << "\n"
@@ -3416,6 +3479,9 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
+ // Ignore landingpad instructions.
+ while (isa<LandingPadInst>(IP)) ++IP;
+
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -3527,7 +3593,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
- ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+ ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
@@ -3611,10 +3677,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
- Loop *PNLoop = LI.getLoopFor(PN->getParent());
- if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ BasicBlock *Parent = PN->getParent();
+ Loop *PNLoop = LI.getLoopFor(Parent);
+ if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+ BasicBlock *NewBB = 0;
+ if (!Parent->isLandingPad()) {
+ NewBB = SplitCriticalEdge(BB, Parent, P,
+ /*MergeIdenticalEdges=*/true,
+ /*DontDeleteUselessPhis=*/true);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+ NewBB = NewBBs[0];
+ }
// If PN is outside of the loop and BB is in the loop, we want to
// move the block to be immediately before the PHI block, not
@@ -3700,6 +3776,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
SCEVExpander Rewriter(SE, "lsr");
Rewriter.disableCanonicalMode();
+ Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Expand the new value definitions and update the users.
@@ -3740,6 +3817,23 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
OptimizeShadowIV();
OptimizeLoopTermCond();
+ // If loop preparation eliminates all interesting IV users, bail.
+ if (IU.empty()) return;
+
+ // Skip nested loops until we can model them better with formulae.
+ if (!EnableNested && !L->empty()) {
+
+ if (EnablePhiElim) {
+ // Remove any extra phis created by processing inner loops.
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(SE, "lsr");
+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+ }
+ DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
+ return;
+ }
+
// Start collecting data and preparing for the solver.
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
@@ -3763,6 +3857,9 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
Types.clear();
RegUses.clear();
+ if (Solution.empty())
+ return;
+
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
@@ -3778,6 +3875,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// Now that we've decided what we want, make it so.
ImplementSolution(Solution, P);
+
+ if (EnablePhiElim) {
+ // Remove any extra phis created by processing inner loops.
+ SmallVector<WeakVH, 16> DeadInsts;
+ SCEVExpander Rewriter(SE, "lsr");
+ Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
+ Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+ }
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fef6bc31c7..91395b2af6 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Target/TargetData.h"
#include <climits>
using namespace llvm;
@@ -39,6 +40,11 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
+// Temporary flag to be removed in 3.0
+static cl::opt<bool>
+NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
+ cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+
namespace {
class LoopUnroll : public LoopPass {
public:
@@ -49,7 +55,7 @@ namespace {
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
-
+
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -57,11 +63,11 @@ namespace {
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
-
+
// Threshold to use when optsize is specified (and there is no
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
-
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -79,6 +85,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
@@ -101,45 +108,62 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
}
/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+ const TargetData *TD) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
+ Metrics.analyzeBasicBlock(*I, TD);
NumCalls = Metrics.NumInlineCandidates;
-
+
unsigned LoopSize = Metrics.NumInsts;
-
+
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
// not a problem for code quality.
if (LoopSize == 0) LoopSize = 1;
-
+
return LoopSize;
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
-
+
// Determine the current unrolling threshold. While this is normally set
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
unsigned Threshold = CurrentThreshold;
- if (!UserThreshold &&
+ if (!UserThreshold &&
Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
Threshold = OptSizeUnrollThreshold;
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = CurrentCount;
-
+ // Find trip count and trip multiple if count is not available
+ unsigned TripCount = 0;
+ unsigned TripMultiple = 1;
+ if (!NoSCEVUnroll) {
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+ }
+ }
+ else {
+ TripCount = L->getSmallConstantTripCount();
+ if (TripCount == 0)
+ TripMultiple = L->getSmallConstantTripMultiple();
+ }
// Automatically select an unroll count.
+ unsigned Count = CurrentCount;
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -152,8 +176,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Enforce the threshold.
if (Threshold != NoThreshold) {
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
unsigned NumInlineCandidates;
- unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (NumInlineCandidates != 0) {
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
@@ -182,12 +207,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- Function *F = L->getHeader()->getParent();
- if (!UnrollLoop(L, Count, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
return false;
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
- DT->runOnFunction(*F);
return true;
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 840c4b69cf..458949c844 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -492,7 +492,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
Value *BranchVal = LIC;
if (!isa<ConstantInt>(Val) ||
Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+ BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
else if (Val != ConstantInt::getTrue(Val->getContext()))
// We want to enter the new loop when the condition is true.
std::swap(TrueDest, FalseDest);
@@ -561,10 +561,17 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
BasicBlock *ExitBlock = ExitBlocks[i];
SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
pred_end(ExitBlock));
+
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ if (!ExitBlock->isLandingPad()) {
+ SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+ ".us-lcssa", this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
+ this, NewBBs);
+ }
}
}
@@ -632,7 +639,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// as well.
ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
}
-
+
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
@@ -653,6 +660,19 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
+
+ if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
+ PN = PHINode::Create(LPad->getType(), 0, "",
+ ExitSucc->getFirstInsertionPt());
+
+ for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+ LandingPadInst *LPI = BB->getLandingPadInst();
+ LPI->replaceAllUsesWith(PN);
+ PN->addIncoming(LPI, BB);
+ }
+ }
}
// Rewrite the code to refer to itself.
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 9087b46c13..689bbe9b03 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -20,98 +20,88 @@
#include "llvm/Support/IRBuilder.h"
using namespace llvm;
-static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
- IRBuilder<> Builder(II->getParent(), II);
- unsigned IID = II->getIntrinsicID();
- switch (IID) {
- case Intrinsic::memory_barrier:
- break;
+static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
+ IRBuilder<> Builder(CXI->getParent(), CXI);
+ Value *Ptr = CXI->getPointerOperand();
+ Value *Cmp = CXI->getCompareOperand();
+ Value *Val = CXI->getNewValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CXI->replaceAllUsesWith(Orig);
+ CXI->eraseFromParent();
+ return true;
+}
- case Intrinsic::atomic_load_add:
- case Intrinsic::atomic_load_sub:
- case Intrinsic::atomic_load_and:
- case Intrinsic::atomic_load_nand:
- case Intrinsic::atomic_load_or:
- case Intrinsic::atomic_load_xor:
- case Intrinsic::atomic_load_max:
- case Intrinsic::atomic_load_min:
- case Intrinsic::atomic_load_umax:
- case Intrinsic::atomic_load_umin: {
- Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
+static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
- switch (IID) {
- default: assert(0 && "Unrecognized atomic modify operation");
- case Intrinsic::atomic_load_add:
- Res = Builder.CreateAdd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_sub:
- Res = Builder.CreateSub(Orig, Delta);
- break;
- case Intrinsic::atomic_load_and:
- Res = Builder.CreateAnd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
- break;
- case Intrinsic::atomic_load_or:
- Res = Builder.CreateOr(Orig, Delta);
- break;
- case Intrinsic::atomic_load_xor:
- Res = Builder.CreateXor(Orig, Delta);
- break;
- case Intrinsic::atomic_load_max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Delta, Orig);
- break;
- case Intrinsic::atomic_load_min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Orig, Delta);
- break;
- case Intrinsic::atomic_load_umax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Delta, Orig);
- break;
- case Intrinsic::atomic_load_umin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Orig, Delta);
- break;
- }
- Builder.CreateStore(Res, Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
- II->replaceAllUsesWith(Orig);
+ switch (RMWI->getOperation()) {
+ default: llvm_unreachable("Unexpected RMW operation");
+ case AtomicRMWInst::Xchg:
+ Res = Val;
break;
- }
-
- case Intrinsic::atomic_swap: {
- Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Builder.CreateStore(Val, Ptr);
- II->replaceAllUsesWith(Orig);
+ case AtomicRMWInst::Add:
+ Res = Builder.CreateAdd(Orig, Val);
break;
- }
-
- case Intrinsic::atomic_cmp_swap: {
- Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
- Value *Val = II->getArgOperand(2);
-
- LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
- Value *Res = Builder.CreateSelect(Equal, Val, Orig);
- Builder.CreateStore(Res, Ptr);
- II->replaceAllUsesWith(Orig);
+ case AtomicRMWInst::Sub:
+ Res = Builder.CreateSub(Orig, Val);
+ break;
+ case AtomicRMWInst::And:
+ Res = Builder.CreateAnd(Orig, Val);
+ break;
+ case AtomicRMWInst::Nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
+ break;
+ case AtomicRMWInst::Or:
+ Res = Builder.CreateOr(Orig, Val);
+ break;
+ case AtomicRMWInst::Xor:
+ Res = Builder.CreateXor(Orig, Val);
+ break;
+ case AtomicRMWInst::Max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::Min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
+ Orig, Val);
+ break;
+ case AtomicRMWInst::UMax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Val, Orig);
+ break;
+ case AtomicRMWInst::UMin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
+ Orig, Val);
break;
}
+ Builder.CreateStore(Res, Ptr);
+ RMWI->replaceAllUsesWith(Orig);
+ RMWI->eraseFromParent();
+ return true;
+}
- default:
- return false;
- }
+static bool LowerFenceInst(FenceInst *FI) {
+ FI->eraseFromParent();
+ return true;
+}
- assert(II->use_empty() &&
- "Lowering should have eliminated any uses of the intrinsic call!");
- II->eraseFromParent();
+static bool LowerLoadInst(LoadInst *LI) {
+ LI->setAtomic(NotAtomic);
+ return true;
+}
+static bool LowerStoreInst(StoreInst *SI) {
+ SI->setAtomic(NotAtomic);
return true;
}
@@ -123,9 +113,22 @@ namespace {
}
bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
- for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
- Changed |= LowerAtomicIntrinsic(II);
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ Changed |= LowerFenceInst(FI);
+ else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ Changed |= LowerAtomicCmpXchgInst(CXI);
+ else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
+ Changed |= LowerAtomicRMWInst(RMWI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isAtomic())
+ LowerLoadInst(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isAtomic())
+ LowerStoreInst(SI);
+ }
+ }
return Changed;
}
};
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index ba5ee68ebb..298d692749 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
@@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
if (TD == 0) return false;
@@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse() &&
+ if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
@@ -806,21 +806,26 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
MemDepResult DepInfo = MD->getDependency(M);
- if (!DepInfo.isClobber())
- return false;
-
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
-
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), C)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- return true;
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
}
}
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::Location SrcLoc = AA.getLocationForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
+ M, M->getParent());
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ }
+
return false;
}
@@ -860,7 +865,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
- Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index f2e5ff9ac9..80f5f014c8 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -344,6 +344,10 @@ static InstructionClass GetInstructionClass(const Value *V) {
break;
default:
// For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
if (IsPotentialUse(*OI))
@@ -421,9 +425,10 @@ static bool IsAlwaysTail(InstructionClass Class) {
/// IsNoThrow - Test if the given class represents instructions which are always
/// safe to mark with the nounwind attribute..
static bool IsNoThrow(InstructionClass Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
return Class == IC_Retain ||
Class == IC_RetainRV ||
- Class == IC_RetainBlock ||
Class == IC_Release ||
Class == IC_Autorelease ||
Class == IC_AutoreleaseRV ||
@@ -515,6 +520,10 @@ static bool IsObjCIdentifiedObject(const Value *V) {
const Value *Pointer =
StripPointerCastsAndObjCCalls(LI->getPointerOperand());
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (GV->isConstant())
+ return true;
StringRef Name = GV->getName();
// These special variables are known to hold values which are not
// reference-counted pointers.
@@ -738,7 +747,6 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
switch (GetBasicInstructionClass(CS.getInstruction())) {
case IC_Retain:
case IC_RetainRV:
- case IC_RetainBlock:
case IC_Autorelease:
case IC_AutoreleaseRV:
case IC_NoopCast:
@@ -746,6 +754,8 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
// These functions don't access any memory visible to the compiler.
+ // Note that this doesn't include objc_retainBlock, becuase it updates
+ // pointers when it copies block data.
return NoModRef;
default:
break;
@@ -877,7 +887,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
// usually can't sink them past other calls, which would be the main
// case where it would be useful.
-/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
#include "llvm/GlobalAlias.h"
#include "llvm/Constants.h"
@@ -1098,16 +1110,16 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
if (A == S_None || B == S_None)
return S_None;
- // Note that we can't merge S_CanRelease and S_Use.
if (A > B) std::swap(A, B);
if (TopDown) {
// Choose the side which is further along in the sequence.
- if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
return B;
} else {
// Choose the side which is further along in the sequence.
if ((A == S_Use || A == S_CanRelease) &&
- (B == S_Release || B == S_Stop || B == S_MovableRelease))
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
return A;
// If both sides are releases, choose the more conservative one.
if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
@@ -1124,22 +1136,37 @@ namespace {
/// retain-decrement-use-release sequence or release-use-decrement-retain
/// reverese sequence.
struct RRInfo {
- /// KnownIncremented - After an objc_retain, the reference count of the
- /// referenced object is known to be positive. Similarly, before an
- /// objc_release, the reference count of the referenced object is known to
- /// be positive. If there are retain-release pairs in code regions where the
- /// retain count is known to be positive, they can be eliminated, regardless
- /// of any side effects between them.
- bool KnownIncremented;
+ /// KnownSafe - After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
/// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
/// opposed to objc_retain calls).
bool IsRetainBlock;
+ /// CopyOnEscape - True if this the Calls are objc_retainBlock calls
+ /// which all have the !clang.arc.copy_on_escape metadata.
+ bool CopyOnEscape;
+
/// IsTailCallRelease - True of the objc_release calls are all marked
/// with the "tail" keyword.
bool IsTailCallRelease;
+ /// Partial - True of we've seen an opportunity for partial RR elimination,
+ /// such as pushing calls into a CFG triangle or into one side of a
+ /// CFG diamond.
+ bool Partial;
+
/// ReleaseMetadata - If the Calls are objc_release calls and they all have
/// a clang.imprecise_release tag, this is the metadata tag.
MDNode *ReleaseMetadata;
@@ -1153,7 +1180,8 @@ namespace {
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
RRInfo() :
- KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+ KnownSafe(false), IsRetainBlock(false), CopyOnEscape(false),
+ IsTailCallRelease(false), Partial(false),
ReleaseMetadata(0) {}
void clear();
@@ -1161,9 +1189,11 @@ namespace {
}
void RRInfo::clear() {
- KnownIncremented = false;
+ KnownSafe = false;
IsRetainBlock = false;
+ CopyOnEscape = false;
IsTailCallRelease = false;
+ Partial = false;
ReleaseMetadata = 0;
Calls.clear();
ReverseInsertPts.clear();
@@ -1176,6 +1206,9 @@ namespace {
/// RefCount - The known minimum number of reference count increments.
unsigned RefCount;
+ /// NestCount - The known minimum level of retain+release nesting.
+ unsigned NestCount;
+
/// Seq - The current position in the sequence.
Sequence Seq;
@@ -1184,7 +1217,11 @@ namespace {
/// TODO: Encapsulate this better.
RRInfo RRI;
- PtrState() : RefCount(0), Seq(S_None) {}
+ PtrState() : RefCount(0), NestCount(0), Seq(S_None) {}
+
+ void SetAtLeastOneRefCount() {
+ if (RefCount == 0) RefCount = 1;
+ }
void IncrementRefCount() {
if (RefCount != UINT_MAX) ++RefCount;
@@ -1194,14 +1231,22 @@ namespace {
if (RefCount != 0) --RefCount;
}
- void ClearRefCount() {
- RefCount = 0;
- }
-
bool IsKnownIncremented() const {
return RefCount > 0;
}
+ void IncrementNestCount() {
+ if (NestCount != UINT_MAX) ++NestCount;
+ }
+
+ void DecrementNestCount() {
+ if (NestCount != 0) --NestCount;
+ }
+
+ bool IsKnownNested() const {
+ return NestCount > 0;
+ }
+
void SetSeq(Sequence NewSeq) {
Seq = NewSeq;
}
@@ -1233,23 +1278,40 @@ void
PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
RefCount = std::min(RefCount, Other.RefCount);
+ NestCount = std::min(NestCount, Other.NestCount);
// We can't merge a plain objc_retain with an objc_retainBlock.
if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
Seq = S_None;
+ // If we're not in a sequence (anymore), drop all associated state.
if (Seq == S_None) {
RRI.clear();
+ } else if (RRI.Partial || Other.RRI.Partial) {
+ // If we're doing a merge on a path that's previously seen a partial
+ // merge, conservatively drop the sequence, to avoid doing partial
+ // RR elimination. If the branch predicates for the two merge differ,
+ // mixing them is unsafe.
+ Seq = S_None;
+ RRI.clear();
} else {
// Conservatively merge the ReleaseMetadata information.
if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
RRI.ReleaseMetadata = 0;
- RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+ RRI.CopyOnEscape = RRI.CopyOnEscape && Other.RRI.CopyOnEscape;
+ RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
- RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
- Other.RRI.ReverseInsertPts.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ RRI.Partial = RRI.ReverseInsertPts.size() !=
+ Other.RRI.ReverseInsertPts.size();
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ I = Other.RRI.ReverseInsertPts.begin(),
+ E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+ RRI.Partial |= RRI.ReverseInsertPts.insert(*I);
}
}
@@ -1316,7 +1378,7 @@ namespace {
}
void clearBottomUpPointers() {
- PerPtrTopDown.clear();
+ PerPtrBottomUp.clear();
}
void clearTopDownPointers() {
@@ -1334,6 +1396,12 @@ namespace {
unsigned GetAllPathCount() const {
return TopDownPathCount * BottomUpPathCount;
}
+
+ /// IsVisitedTopDown - Test whether the block for this BBState has been
+ /// visited by the top-down portion of the algorithm.
+ bool isVisitedTopDown() const {
+ return TopDownPathCount != 0;
+ }
};
}
@@ -1364,7 +1432,7 @@ void BBState::MergePred(const BBState &Other) {
/*TopDown=*/true);
}
- // For each entry in our set, if the other set doens't have an entry with the
+ // For each entry in our set, if the other set doesn't have an entry with the
// same key, force it to merge with an empty entry.
for (ptr_iterator MI = top_down_ptr_begin(),
ME = top_down_ptr_end(); MI != ME; ++MI)
@@ -1389,7 +1457,7 @@ void BBState::MergeSucc(const BBState &Other) {
/*TopDown=*/false);
}
- // For each entry in our set, if the other set doens't have an entry
+ // For each entry in our set, if the other set doesn't have an entry
// with the same key, force it to merge with an empty entry.
for (ptr_iterator MI = bottom_up_ptr_begin(),
ME = bottom_up_ptr_end(); MI != ME; ++MI)
@@ -1406,15 +1474,11 @@ namespace {
/// Run - A flag indicating whether this optimization pass should run.
bool Run;
- /// RetainFunc, RelaseFunc - Declarations for objc_retain,
- /// objc_retainBlock, and objc_release.
- Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
-
/// RetainRVCallee, etc. - Declarations for ObjC runtime
/// functions, for use in creating calls to them. These are initialized
/// lazily to avoid cluttering up the Module with unused declarations.
Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
- *RetainCallee, *AutoreleaseCallee;
+ *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee;
/// UsedInThisFunciton - Flags which determine whether each of the
/// interesting runtine functions is in fact used in the current function.
@@ -1424,10 +1488,15 @@ namespace {
/// metadata.
unsigned ImpreciseReleaseMDKind;
+ /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape
+ /// metadata.
+ unsigned CopyOnEscapeMDKind;
+
Constant *getRetainRVCallee(Module *M);
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
Constant *getRetainCallee(Module *M);
+ Constant *getRetainBlockCallee(Module *M);
Constant *getAutoreleaseCallee(Module *M);
void OptimizeRetainCall(Function &F, Instruction *Retain);
@@ -1452,11 +1521,13 @@ namespace {
void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts);
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M);
bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases);
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M);
void OptimizeWeakCalls(Function &F);
@@ -1561,6 +1632,23 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
return RetainCallee;
}
+Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
+ if (!RetainBlockCallee) {
+ LLVMContext &C = M->getContext();
+ std::vector<Type *> Params;
+ Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+ AttrListPtr Attributes;
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ RetainBlockCallee =
+ M->getOrInsertFunction(
+ "objc_retainBlock",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attributes);
+ }
+ return RetainBlockCallee;
+}
+
Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
if (!AutoreleaseCallee) {
LLVMContext &C = M->getContext();
@@ -1904,12 +1992,19 @@ void
ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
// Check for a return of the pointer value.
const Value *Ptr = GetObjCArg(AutoreleaseRV);
- for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
- UI != UE; ++UI) {
- const User *I = *UI;
- if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
- return;
- }
+ SmallVector<const Value *, 2> Users;
+ Users.push_back(Ptr);
+ do {
+ Ptr = Users.pop_back_val();
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ if (isa<BitCastInst>(I))
+ Users.push_back(I);
+ }
+ } while (!Users.empty());
Changed = true;
++NumPeeps;
@@ -2132,41 +2227,49 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
- switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
+ switch (SuccS.GetSeq()) {
case S_None:
- case S_CanRelease:
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
- SomeSuccHasSame = false;
- break;
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ S.ClearSequenceProgress();
+ continue;
+ }
case S_Use:
SomeSuccHasSame = true;
break;
case S_Stop:
case S_Release:
case S_MovableRelease:
- AllSuccsHaveSame = false;
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ AllSuccsHaveSame = false;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ }
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ S.ClearSequenceProgress();
}
case S_CanRelease: {
const Value *Arg = I->first;
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
bool SomeSuccHasSame = false;
bool AllSuccsHaveSame = true;
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
- switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
- case S_None:
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
- SomeSuccHasSame = false;
- break;
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
+ switch (SuccS.GetSeq()) {
+ case S_None: {
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ S.ClearSequenceProgress();
+ continue;
+ }
case S_CanRelease:
SomeSuccHasSame = true;
break;
@@ -2174,16 +2277,18 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
case S_Release:
case S_MovableRelease:
case S_Use:
- AllSuccsHaveSame = false;
+ if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+ AllSuccsHaveSame = false;
break;
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ }
// If the state at the other end of any of the successor edges
// matches the current state, require all edges to match. This
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
- MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+ S.ClearSequenceProgress();
}
}
}
@@ -2207,6 +2312,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (Succ == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ // If we haven't seen this node yet, then we've found a CFG cycle.
+ // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
if (I == BBStates.end())
continue;
MyStates.InitFromSucc(I->second);
@@ -2245,11 +2352,12 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
S.RRI.clear();
- S.RRI.KnownIncremented = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
S.IncrementRefCount();
+ S.IncrementNestCount();
break;
}
case IC_RetainBlock:
@@ -2259,6 +2367,16 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.DecrementRefCount();
+ S.SetAtLeastOneRefCount();
+ S.DecrementNestCount();
+
+ // An non-copy-on-escape objc_retainBlock call with just a use still
+ // needs to be kept, because it may be copying a block from the stack
+ // to the heap.
+ if (Class == IC_RetainBlock &&
+ !Inst->getMetadata(CopyOnEscapeMDKind) &&
+ S.GetSeq() == S_Use)
+ S.SetSeq(S_CanRelease);
switch (S.GetSeq()) {
case S_Stop:
@@ -2272,6 +2390,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// better to let it remain as the first instruction after a call.
if (Class != IC_RetainRV) {
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ if (S.RRI.IsRetainBlock)
+ S.RRI.CopyOnEscape = !!Inst->getMetadata(CopyOnEscapeMDKind);
Retains[Inst] = S.RRI;
}
S.ClearSequenceProgress();
@@ -2281,7 +2401,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
- break;
+ continue;
}
case IC_AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
@@ -2305,26 +2425,22 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
PtrState &S = MI->second;
Sequence Seq = S.GetSeq();
- // Check for possible retains and releases.
+ // Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- // Check for a retain (we're going bottom-up here).
S.DecrementRefCount();
-
- // Check for a release.
- if (!IsRetain(Class) && Class != IC_RetainBlock)
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
}
// Check for possible direct uses.
@@ -2332,14 +2448,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
case S_Release:
case S_MovableRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
S.SetSeq(S_Use);
} else if (Seq == S_Release &&
(Class == IC_User || Class == IC_CallOrUser)) {
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
}
break;
@@ -2378,14 +2494,18 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- if (I == BBStates.end())
+ assert(I != BBStates.end());
+ // If we haven't seen this node yet, then we've found a CFG cycle.
+ // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
+ if (!I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- if (I != BBStates.end())
+ assert(I != BBStates.end());
+ if (I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2422,18 +2542,25 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
S.SetSeq(S_Retain);
S.RRI.clear();
S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- S.RRI.KnownIncremented = S.IsKnownIncremented();
+ if (S.RRI.IsRetainBlock)
+ S.RRI.CopyOnEscape = !!Inst->getMetadata(CopyOnEscapeMDKind);
+ // Don't check S.IsKnownIncremented() here because it's not
+ // sufficient.
+ S.RRI.KnownSafe = S.IsKnownNested();
S.RRI.Calls.insert(Inst);
}
+ S.SetAtLeastOneRefCount();
S.IncrementRefCount();
- break;
+ S.IncrementNestCount();
+ continue;
}
case IC_Release: {
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
S.DecrementRefCount();
+ S.DecrementNestCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -2478,16 +2605,12 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
Sequence Seq = S.GetSeq();
// Check for possible releases.
- if (!IsRetain(Class) && Class != IC_RetainBlock &&
- CanAlterRefCount(Inst, Ptr, PA, Class)) {
- // Check for a release.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
S.DecrementRefCount();
-
- // Check for a release.
switch (Seq) {
case S_Retain:
S.SetSeq(S_CanRelease);
- S.RRI.ReverseInsertPts.clear();
+ assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
// One call can't cause a transition from S_Retain to S_CanRelease
@@ -2511,8 +2634,19 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (CanUse(Inst, Ptr, PA, Class))
S.SetSeq(S_Use);
break;
- case S_Use:
case S_Retain:
+ // A non-copy-on-scape objc_retainBlock call may be responsible for
+ // copying the block data from the stack to the heap. Model this by
+ // moving it straight from S_Retain to S_Use.
+ if (S.RRI.IsRetainBlock &&
+ !S.RRI.CopyOnEscape &&
+ CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
+ S.SetSeq(S_Use);
+ }
+ break;
+ case S_Use:
case S_None:
break;
case S_Stop:
@@ -2533,28 +2667,43 @@ ObjCARCOpt::Visit(Function &F,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases) {
- // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+ // Use reverse-postorder on the reverse CFG for bottom-up, because we
// magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release.
+ // call retain on a single pointer without doing a release. We can't use
+ // ReversePostOrderTraversal here because we want to walk up from each
+ // function exit point.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
+ SmallVector<BasicBlock *, 16> Order;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *BB = I;
+ if (BB->getTerminator()->getNumSuccessors() == 0)
+ Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ }
+ while (!Stack.empty()) {
+ pred_iterator End = pred_end(Stack.back().first);
+ while (Stack.back().second != End) {
+ BasicBlock *BB = *Stack.back().second++;
+ if (Visited.insert(BB))
+ Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ }
+ Order.push_back(Stack.pop_back_val().first);
+ }
bool BottomUpNestingDetected = false;
- SmallVector<BasicBlock *, 8> PostOrder;
- for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ Order.rbegin(), E = Order.rend(); I != E; ++I) {
BasicBlock *BB = *I;
- PostOrder.push_back(BB);
-
BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
}
- // Iterate through the post-order in reverse order, achieving a
- // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
- // class here because it works by computing its own full postorder iteration,
- // recording the sequence, and playing it back in reverse. Since we're already
- // doing a full iteration above, we can just record the sequence manually and
- // avoid the cost of having ReversePostOrderTraversal compute it.
+ // Use regular reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
- RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
- TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+ typedef ReversePostOrderTraversal<Function *> RPOTType;
+ RPOTType RPOT(&F);
+ for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ BasicBlock *BB = *I;
+ TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
+ }
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -2565,12 +2714,10 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
RRInfo &ReleasesToMove,
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts) {
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M) {
Type *ArgTy = Arg->getType();
- Type *ParamTy =
- (RetainRVFunc ? RetainRVFunc :
- RetainFunc ? RetainFunc :
- RetainBlockFunc)->arg_begin()->getType();
+ Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
// Insert the new retain and release calls.
for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2581,9 +2728,12 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
new BitCastInst(Arg, ParamTy, "", InsertPt);
CallInst *Call =
CallInst::Create(RetainsToMove.IsRetainBlock ?
- RetainBlockFunc : RetainFunc,
+ getRetainBlockCallee(M) : getRetainCallee(M),
MyArg, "", InsertPt);
Call->setDoesNotThrow();
+ if (RetainsToMove.CopyOnEscape)
+ Call->setMetadata(CopyOnEscapeMDKind,
+ MDNode::get(M->getContext(), ArrayRef<Value *>()));
if (!RetainsToMove.IsRetainBlock)
Call->setTailCall();
}
@@ -2598,8 +2748,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
// The invoke's return value isn't available in the unwind block,
// but our releases will never depend on it, because they must be
// paired with retains from before the invoke.
- InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
- InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+ InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
+ InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
} else {
// Insert code immediately after the last use.
InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
@@ -2609,7 +2759,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *InsertPt = *I;
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
Call->setMetadata(ImpreciseReleaseMDKind, M);
@@ -2640,7 +2791,8 @@ bool
ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
&BBStates,
MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M) {
bool AnyPairsCompletelyEliminated = false;
RRInfo RetainsToMove;
RRInfo ReleasesToMove;
@@ -2649,21 +2801,37 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
SmallVector<Instruction *, 8> DeadInsts;
for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
- E = Retains.end(); I != E; ) {
- Value *V = (I++)->first;
+ E = Retains.end(); I != E; ++I) {
+ Value *V = I->first;
if (!V) continue; // blotted
Instruction *Retain = cast<Instruction>(V);
Value *Arg = GetObjCArg(Retain);
- // If the object being released is in static or stack storage, we know it's
+ // If the object being released is in static storage, we know it's
// not being managed by ObjC reference counting, so we can delete pairs
// regardless of what possible decrements or uses lie between them.
- bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+ bool KnownSafe = isa<Constant>(Arg);
+
+ // Same for stack storage, unless this is a non-copy-on-escape
+ // objc_retainBlock call, which is responsible for copying the block data
+ // from the stack to the heap.
+ if ((!I->second.IsRetainBlock || I->second.CopyOnEscape) &&
+ isa<AllocaInst>(Arg))
+ KnownSafe = true;
+
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+ if (GV->isConstant())
+ KnownSafe = true;
// If a pair happens in a region where it is known that the reference count
// is already incremented, we can similarly ignore possible decrements.
- bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+ bool KnownSafeTD = true, KnownSafeBU = true;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
@@ -2683,7 +2851,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
- KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+ KnownSafeTD &= NewRetainRRI.KnownSafe;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewRetainRRI.Calls.begin(),
LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
@@ -2739,7 +2907,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Releases.find(NewRelease);
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
- KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+ KnownSafeBU &= NewReleaseRRI.KnownSafe;
for (SmallPtrSet<Instruction *, 2>::const_iterator
LI = NewReleaseRRI.Calls.begin(),
LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
@@ -2759,6 +2927,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
// Merge the IsRetainBlock values.
if (FirstRetain) {
RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+ RetainsToMove.CopyOnEscape = NewReleaseRetainRRI.CopyOnEscape;
FirstRetain = false;
} else if (ReleasesToMove.IsRetainBlock !=
NewReleaseRetainRRI.IsRetainBlock)
@@ -2766,6 +2935,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
// objc_retain and the other uses objc_retainBlock.
goto next_retain;
+ // Merge the CopyOnEscape values.
+ RetainsToMove.CopyOnEscape &= NewReleaseRetainRRI.CopyOnEscape;
+
// Collect the optimal insertion points.
if (!KnownSafe)
for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2787,12 +2959,19 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented, we can safely delete the pair
- // regardless of what's between them.
- if (KnownIncrementedTD || KnownIncrementedBU) {
+ // If the pointer is known incremented or nested, we can safely delete the
+ // pair regardless of what's between them.
+ if (KnownSafeTD || KnownSafeBU) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
NewCount = 0;
+ } else {
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ goto next_retain;
}
// Determine whether the original call points are balanced in the retain and
@@ -2803,18 +2982,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
goto next_retain;
- // Determine whether the new insertion points we computed preserve the
- // balance of retain and release calls through the program.
- // TODO: If the fully aggressive solution isn't valid, try to find a
- // less aggressive solution which is.
- if (NewDelta != 0)
- goto next_retain;
-
// Ok, everything checks out and we're all set. Let's move some code!
Changed = true;
AnyPairsCompletelyEliminated = NewCount == 0;
NumRRs += OldCount - NewCount;
- MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove,
+ Retains, Releases, DeadInsts, M);
next_retain:
NewReleases.clear();
@@ -2993,7 +3166,8 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
// Transform.
- return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+ return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
+ NestingDetected;
}
/// OptimizeReturns - Look for this pattern:
@@ -3072,7 +3246,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
// Check that there is nothing that can affect the reference
// count between the retain and the call.
- FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+ // Note that Retain need not be in BB.
+ FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
DependingInstructions, Visited, PA);
if (DependingInstructions.size() != 1)
goto next_block;
@@ -3116,12 +3291,8 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// Identify the imprecise release metadata kind.
ImpreciseReleaseMDKind =
M.getContext().getMDKindID("clang.imprecise_release");
-
- // Identify the declarations for objc_retain and friends.
- RetainFunc = M.getFunction("objc_retain");
- RetainBlockFunc = M.getFunction("objc_retainBlock");
- RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
- ReleaseFunc = M.getFunction("objc_release");
+ CopyOnEscapeMDKind =
+ M.getContext().getMDKindID("clang.arc.copy_on_escape");
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
@@ -3132,6 +3303,7 @@ bool ObjCARCOpt::doInitialization(Module &M) {
AutoreleaseRVCallee = 0;
ReleaseCallee = 0;
RetainCallee = 0;
+ RetainBlockCallee = 0;
AutoreleaseCallee = 0;
return false;
@@ -3377,7 +3549,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
void ObjCARCContract::ContractRelease(Instruction *Release,
inst_iterator &Iter) {
LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
- if (!Load || Load->isVolatile()) return;
+ if (!Load || !Load->isSimple()) return;
// For now, require everything to be in one basic block.
BasicBlock *BB = Release->getParent();
@@ -3393,7 +3565,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
!(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
++I;
StoreInst *Store = dyn_cast<StoreInst>(I);
- if (!Store || Store->isVolatile()) return;
+ if (!Store || !Store->isSimple()) return;
if (Store->getPointerOperand() != Loc.Ptr) return;
Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index e6341ae307..8f98a5b650 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -309,7 +309,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
std::swap(LHS, RHS);
bool Success = !I->swapOperands();
assert(Success && "swapOperands failed");
- Success = false;
+ (void)Success;
MadeChange = true;
} else if (RHSBO) {
// Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 5b12c92762..196a847fc0 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -156,7 +156,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
- SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+ SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
/// StructValueState - This maintains ValueState for values that have
@@ -471,9 +471,9 @@ private:
/// UsersOfOverdefinedPHIs map for PN, remove them now.
void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
if (UsersOfOverdefinedPHIs.empty()) return;
- std::multimap<PHINode*, Instruction*>::iterator It, E;
- tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
- while (It != E) {
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (ItTy It = Range.first, E = Range.second; It != E;) {
if (It->second == I)
UsersOfOverdefinedPHIs.erase(It++);
else
@@ -486,9 +486,9 @@ private:
/// (Duplicate entries do not break anything directly, but can lead to
/// exponential growth of the table in rare cases.)
void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
- std::multimap<PHINode*, Instruction*>::iterator J, E;
- tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
- for (; J != E; ++J)
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (ItTy J = Range.first, E = Range.second; J != E; ++J)
if (J->second == I)
return;
UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
@@ -515,6 +515,7 @@ private:
void visitShuffleVectorInst(ShuffleVectorInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
+ void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
// Instructions that cannot be folded away.
void visitStoreInst (StoreInst &I);
@@ -528,8 +529,12 @@ private:
visitTerminatorInst(II);
}
void visitCallSite (CallSite CS);
+ void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+ void visitFenceInst (FenceInst &I) { /*returns void*/ }
+ void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
+ void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
@@ -577,6 +582,10 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+ if (TI.getNumSuccessors() < 2) {
+ Succs[0] = true;
+ return;
+ }
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
@@ -637,6 +646,9 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
return true;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (SI->getNumSuccessors() < 2)
+ return true;
+
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
@@ -692,13 +704,14 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// There may be instructions using this PHI node that are not overdefined
// themselves. If so, make sure that they know that the PHI node operand
// changed.
- std::multimap<PHINode*, Instruction*>::iterator I, E;
- tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
- if (I == E)
+ typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
+ std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN);
+
+ if (Range.first == Range.second)
return;
SmallVector<Instruction*, 16> Users;
- for (; I != E; ++I)
+ for (ItTy I = Range.first, E = Range.second; I != E; ++I)
Users.push_back(I->second);
while (!Users.empty())
visit(Users.pop_back_val());
@@ -1179,8 +1192,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
}
Constant *Ptr = Operands[0];
- markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
- Operands.size()-1));
+ ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end());
+ markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1420,66 +1433,115 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (I->getType()->isVoidTy()) continue;
if (StructType *STy = dyn_cast<StructType>(I->getType())) {
- // Only a few things that can be structs matter for undef. Just send
- // all their results to overdefined. We could be more precise than this
- // but it isn't worth bothering.
- if (isa<CallInst>(I) || isa<SelectInst>(I)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- LatticeVal &LV = getStructValueState(I, i);
- if (LV.isUndefined())
- markOverdefined(LV, I);
- }
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in ResolvedUndefsIn.
+ if (CallSite CS = CallSite(I))
+ if (Function *F = CS.getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ continue;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ continue;
+
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ LatticeVal &LV = getStructValueState(I, i);
+ if (LV.isUndefined())
+ markOverdefined(LV, I);
}
continue;
}
-
+
LatticeVal &LV = getValueState(I);
if (!LV.isUndefined()) continue;
- // No instructions using structs need disambiguation.
- if (I->getOperand(0)->getType()->isStructTy())
+ // extractvalue is safe; check here because the argument is a struct.
+ if (isa<ExtractValueInst>(I))
continue;
- // Get the lattice values of the first two operands for use below.
+ // Compute the operand LatticeVals, for convenience below.
+ // Anything taking a struct is conservatively assumed to require
+ // overdefined markings.
+ if (I->getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
LatticeVal Op0LV = getValueState(I->getOperand(0));
LatticeVal Op1LV;
if (I->getNumOperands() == 2) {
- // No instructions using structs need disambiguation.
- if (I->getOperand(1)->getType()->isStructTy())
- continue;
-
- // If this is a two-operand instruction, and if both operands are
- // undefs, the result stays undef.
+ if (I->getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(I);
+ return true;
+ }
+
Op1LV = getValueState(I->getOperand(1));
- if (Op0LV.isUndefined() && Op1LV.isUndefined())
- continue;
}
-
// If this is an instructions whose result is defined even if the input is
// not fully defined, propagate the information.
Type *ITy = I->getType();
switch (I->getOpcode()) {
- default: break; // Leave the instruction as an undef.
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ break; // Any undef -> undef
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ // Floating-point binary operation: be conservative.
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ else
+ markOverdefined(I);
+ return true;
case Instruction::ZExt:
- // After a zero extend, we know the top part is zero. SExt doesn't have
- // to be handled here, because we don't know whether the top part is 1's
- // or 0's.
- case Instruction::SIToFP: // some FP values are not possible, just use 0.
- case Instruction::UIToFP: // some FP values are not possible, just use 0.
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ // undef -> 0; some outputs are impossible
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Mul:
case Instruction::And:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Or:
+ // Both operands undef -> undef
+ if (Op0LV.isUndefined() && Op1LV.isUndefined())
+ break;
// undef | X -> -1. X could be -1.
markForcedConstant(I, Constant::getAllOnesValue(ITy));
return true;
+ case Instruction::Xor:
+ // undef ^ undef -> 0; strictly speaking, this is not strictly
+ // necessary, but we try to be nice to people who expect this
+ // behavior in simple cases
+ if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
+ markForcedConstant(I, Constant::getNullValue(ITy));
+ return true;
+ }
+ // undef ^ X -> undef
+ break;
+
case Instruction::SDiv:
case Instruction::UDiv:
case Instruction::SRem:
@@ -1494,26 +1556,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
case Instruction::AShr:
- // undef >>s X -> undef. No change.
- if (Op0LV.isUndefined()) break;
-
- // X >>s undef -> X. X could be 0, X could have the high-bit known set.
- if (Op0LV.isConstant())
- markForcedConstant(I, Op0LV.getConstant());
- else
- markOverdefined(I);
+ // X >>a undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef >>a X -> all ones
+ markForcedConstant(I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::LShr:
case Instruction::Shl:
- // undef >> X -> undef. No change.
- // undef << X -> undef. No change.
- if (Op0LV.isUndefined()) break;
-
- // X >> undef -> 0. X could be 0.
- // X << undef -> 0. X could be 0.
+ // X << undef -> undef.
+ // X >> undef -> undef.
+ if (Op1LV.isUndefined()) break;
+
+ // undef << X -> 0
+ // undef >> X -> 0
markForcedConstant(I, Constant::getNullValue(ITy));
return true;
case Instruction::Select:
+ Op1LV = getValueState(I->getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
if (Op0LV.isUndefined()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
@@ -1533,9 +1593,35 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
else
markOverdefined(I);
return true;
+ case Instruction::Load:
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ break;
+ case Instruction::ICmp:
+ // X == undef -> undef. Other comparisons get more complicated.
+ if (cast<ICmpInst>(I)->isEquality())
+ break;
+ markOverdefined(I);
+ return true;
case Instruction::Call:
- // If a call has an undef result, it is because it is constant foldable
- // but one of the inputs was undef. Just force the result to
+ case Instruction::Invoke: {
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in ResolvedUndefsIn.
+ if (Function *F = CallSite(I).getCalledFunction())
+ if (TrackedRetVals.count(F))
+ break;
+
+ // If the call is constant-foldable, we mark it overdefined because
+ // we do not know what return values are valid.
+ markOverdefined(I);
+ return true;
+ }
+ default:
+ // If we don't know what should happen here, conservatively mark it
// overdefined.
markOverdefined(I);
return true;
@@ -1621,15 +1707,25 @@ FunctionPass *llvm::createSCCPPass() {
static void DeleteInstructionInBlock(BasicBlock *BB) {
DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
++NumDeadBlocks;
-
- // Delete the instructions backwards, as it has a reduced likelihood of
- // having to update as many def-use and use-def chains.
- while (!isa<TerminatorInst>(BB->begin())) {
- Instruction *I = --BasicBlock::iterator(BB->getTerminator());
-
- if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- BB->getInstList().erase(I);
+
+ // Check to see if there are non-terminating instructions to delete.
+ if (isa<TerminatorInst>(BB->begin()))
+ return;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of having
+ // to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ BB->getInstList().erase(Inst);
++NumInstRemoved;
}
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 302c287d3c..f6918deafe 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -63,7 +63,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeCFGSimplifyPassPass(Registry);
initializeSimplifyLibCallsPass(Registry);
initializeSinkingPass(Registry);
- initializeTailDupPass(Registry);
initializeTailCallElimPass(Registry);
}
@@ -187,3 +186,7 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBasicAliasAnalysisPass());
}
+
+void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerExpectIntrinsicPass());
+}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index fbf309292d..c6d9123d66 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -145,6 +145,9 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
+ void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
@@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
if (ScalarKind == Unknown)
ScalarKind = Integer;
- // FIXME: It should be possible to promote the vector type up to the alloca's
- // size.
if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
ScalarKind = Integer;
@@ -331,16 +332,12 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// (VectorTy) so far at the offset specified by Offset (which is specified in
/// bytes).
///
-/// There are three cases we handle here:
+/// There are two cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
-/// <2 x float> and <4 x float>. Here we turn element accesses into insert
-/// and extract element operations, and <2 x float> accesses into a cast to
-/// <2 x double>, an extract, and a cast back to <2 x float>.
-/// 3) A fully general blob of memory, which we turn into some (potentially
+/// 2) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
@@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
+ (!VectorTy || EltSize == VectorTy->getElementType()
+ ->getPrimitiveSizeInBits()/8)) {
if (!VectorTy) {
ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
}
-
- unsigned CurrentEltSize = VectorTy->getElementType()
- ->getPrimitiveSizeInBits()/8;
- if (EltSize == CurrentEltSize)
- return;
-
- if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
- return;
+ return;
}
}
@@ -397,72 +387,19 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
/// returning true if the type was successfully merged and false otherwise.
bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
uint64_t Offset) {
- // TODO: Support nonzero offsets?
- if (Offset != 0)
- return false;
-
- // Only allow vectors that are a power-of-2 away from the size of the alloca.
- if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
- return false;
-
- // If this the first vector we see, remember the type so that we know the
- // element size.
- if (!VectorTy) {
+ if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+ // If we're storing/loading a vector of the right size, allow it as a
+ // vector. If this the first vector we see, remember the type so that
+ // we know the element size. If this is a subsequent access, ignore it
+ // even if it is a differing type but the same size. Worst case we can
+ // bitcast the resultant vectors.
+ if (!VectorTy)
+ VectorTy = VInTy;
ScalarKind = Vector;
- VectorTy = VInTy;
return true;
}
- unsigned BitWidth = VectorTy->getBitWidth();
- unsigned InBitWidth = VInTy->getBitWidth();
-
- // Vectors of the same size can be converted using a simple bitcast.
- if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
- ScalarKind = Vector;
- return true;
- }
-
- Type *ElementTy = VectorTy->getElementType();
- Type *InElementTy = VInTy->getElementType();
-
- // Do not allow mixed integer and floating-point accesses from vectors of
- // different sizes.
- if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
- return false;
-
- if (ElementTy->isFloatingPointTy()) {
- // Only allow floating-point vectors of different sizes if they have the
- // same element type.
- // TODO: This could be loosened a bit, but would anything benefit?
- if (ElementTy != InElementTy)
- return false;
-
- // There are no arbitrary-precision floating-point types, which limits the
- // number of legal vector types with larger element types that we can form
- // to bitcast and extract a subvector.
- // TODO: We could support some more cases with mixed fp128 and double here.
- if (!(BitWidth == 64 || BitWidth == 128) ||
- !(InBitWidth == 64 || InBitWidth == 128))
- return false;
- } else {
- assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
- "or floating-point.");
- unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
- unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
-
- // Do not allow integer types smaller than a byte or types whose widths are
- // not a multiple of a byte.
- if (BitWidth < 8 || InBitWidth < 8 ||
- BitWidth % 8 != 0 || InBitWidth % 8 != 0)
- return false;
- }
-
- // Pick the largest of the two vector types.
- ScalarKind = Vector;
- if (InBitWidth > BitWidth)
- VectorTy = VInTy;
-
- return true;
+ return false;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
@@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
@@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
- if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
@@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
- IsNotTrivial = true; // Can't be mem2reg'd.
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
return false;
continue;
@@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
continue;
}
+ // If this is a lifetime intrinsic, we can handle it.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ continue;
+ }
+ }
+
// Otherwise, we cannot handle this!
return false;
}
@@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
- Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+ Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
LI->replaceAllUsesWith(NewLoadVal);
@@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
continue;
}
- llvm_unreachable("Unsupported operation!");
- }
-}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ // There's no need to preserve these, as the resulting alloca will be
+ // converted to a register anyways.
+ II->eraseFromParent();
+ continue;
+ }
+ }
-/// getScaledElementType - Gets a scaled element type for a partial vector
-/// access of an alloca. The input types must be integer or floating-point
-/// scalar or vector types, and the resulting type is an integer, float or
-/// double.
-static Type *getScaledElementType(Type *Ty1, Type *Ty2,
- unsigned NewBitWidth) {
- bool IsFP1 = Ty1->isFloatingPointTy() ||
- (Ty1->isVectorTy() &&
- cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
- bool IsFP2 = Ty2->isFloatingPointTy() ||
- (Ty2->isVectorTy() &&
- cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
-
- LLVMContext &Context = Ty1->getContext();
-
- // Prefer floating-point types over integer types, as integer types may have
- // been created by earlier scalar replacement.
- if (IsFP1 || IsFP2) {
- if (NewBitWidth == 32)
- return Type::getFloatTy(Context);
- if (NewBitWidth == 64)
- return Type::getDoubleTy(Context);
+ llvm_unreachable("Unsupported operation!");
}
-
- return Type::getIntNTy(Context, NewBitWidth);
-}
-
-/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
-/// to another vector of the same element type which has the same allocation
-/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
-static Value *CreateShuffleVectorCast(Value *FromVal, Type *ToType,
- IRBuilder<> &Builder) {
- Type *FromType = FromVal->getType();
- VectorType *FromVTy = cast<VectorType>(FromType);
- VectorType *ToVTy = cast<VectorType>(ToType);
- assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
- "Vectors must have the same element type");
- Value *UnV = UndefValue::get(FromType);
- unsigned numEltsFrom = FromVTy->getNumElements();
- unsigned numEltsTo = ToVTy->getNumElements();
-
- SmallVector<Constant*, 3> Args;
- Type* Int32Ty = Builder.getInt32Ty();
- unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
- unsigned i;
- for (i=0; i != minNumElts; ++i)
- Args.push_back(ConstantInt::get(Int32Ty, i));
-
- if (i < numEltsTo) {
- Constant* UnC = UndefValue::get(Int32Ty);
- for (; i != numEltsTo; ++i)
- Args.push_back(UnC);
- }
- Constant *Mask = ConstantVector::get(Args);
- return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
}
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
@@ -787,38 +686,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
- if (FromTypeSize == ToTypeSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (FromType->getPrimitiveSizeInBits() ==
- ToType->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
- else
- return CreateShuffleVectorCast(FromVal, ToType, Builder);
- }
-
- if (isPowerOf2_64(FromTypeSize / ToTypeSize)) {
- assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
- "of a smaller vector type at a nonzero offset.");
-
- Type *CastElementTy = getScaledElementType(FromType, ToType,
- ToTypeSize * 8);
- unsigned NumCastVectorElements = FromTypeSize / ToTypeSize;
-
- LLVMContext &Context = FromVal->getContext();
- Type *CastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Extract, ToType, "tmp");
- }
+ if (FromTypeSize == ToTypeSize)
+ return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -828,10 +697,9 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
- Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
- Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+ Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
if (V->getType() != ToType)
- V = Builder.CreateBitCast(V, ToType, "tmp");
+ V = Builder.CreateBitCast(V, ToType);
return V;
}
@@ -844,7 +712,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
@@ -855,7 +723,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, Builder);
- Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+ Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
@@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateLShr(FromVal,
- ConstantInt::get(FromVal->getType(),
- ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), ShAmt));
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateShl(FromVal,
- ConstantInt::get(FromVal->getType(),
- -ShAmt), "tmp");
+ ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
- LIBitWidth), "tmp");
+ LIBitWidth));
// If the result is an integer, this is a trunc or bitcast.
if (ToType->isIntegerTy()) {
// Should be done.
} else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
- FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateBitCast(FromVal, ToType);
} else {
// Otherwise must be a pointer.
- FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+ FromVal = Builder.CreateIntToPtr(FromVal, ToType);
}
assert(FromVal->getType() == ToType && "Didn't convert right?");
return FromVal;
@@ -936,56 +802,21 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize) {
- // If the two types have the same primitive size, use a bit cast.
- // Otherwise, it is two vectors with the same element type that has
- // the same allocation size but different number of elements so use
- // a shuffle vector.
- if (VTy->getPrimitiveSizeInBits() ==
- SV->getType()->getPrimitiveSizeInBits())
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
- else
- return CreateShuffleVectorCast(SV, VTy, Builder);
- }
-
- if (isPowerOf2_64(VecSize / ValSize)) {
- assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
- "value of a smaller vector type at a nonzero offset.");
-
- Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
- ValSize);
- unsigned NumCastVectorElements = VecSize / ValSize;
-
- LLVMContext &Context = SV->getContext();
- Type *OldCastTy = VectorType::get(CastElementTy,
- NumCastVectorElements);
- Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
-
- Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
-
- unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
- unsigned Elt = Offset/EltSize;
- assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
- Value *Insert =
- Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
- Type::getInt32Ty(Context), Elt), "tmp");
- return Builder.CreateBitCast(Insert, AllocaType, "tmp");
- }
+ if (ValSize == VecSize)
+ return Builder.CreateBitCast(SV, AllocaType);
// Must be an element insertion.
assert(SV->getType() == VTy->getElementType());
uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
unsigned Elt = Offset/EltSize;
- return Builder.CreateInsertElement(Old, SV,
- ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
- "tmp");
+ return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
}
// If SV is a first-class aggregate value, insert each value recursively.
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
@@ -996,7 +827,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+ Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
}
return Old;
@@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
- SV = Builder.CreateBitCast(SV,
- IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+ SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
if (SV->getType()->getPrimitiveSizeInBits() <
AllocaType->getPrimitiveSizeInBits())
- SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+ SV = Builder.CreateZExt(SV, AllocaType);
else {
// Truncation may be needed if storing more than the alloca can hold
// (undefined behavior).
- SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+ SV = Builder.CreateTrunc(SV, AllocaType);
SrcWidth = DestWidth;
SrcStoreWidth = DestStoreWidth;
}
@@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// only some bits in the structure are set.
APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
- SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
- ShAmt), "tmp");
+ SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
Mask <<= ShAmt;
} else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
- SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
- -ShAmt), "tmp");
+ SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
Mask = Mask.lshr(-ShAmt);
}
@@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
UI != UE; ++UI) {
LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || LI->isVolatile()) return false;
+ if (LI == 0 || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
// trapping load in the predecessor if it is a critical edge.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the terminator of the predecessor has side-effects (an invoke),
+ // there is no safe place to put a load in the predecessor.
+ if (Pred->getTerminator()->mayHaveSideEffects())
+ return false;
+
+ // If the value is produced by the terminator of the predecessor
+ // (an invoke), there is no valid place to put a load in the predecessor.
+ if (Pred->getTerminator() == InVal)
+ return false;
// If the predecessor has a single successor, then the edge isn't critical.
if (Pred->getTerminator()->getNumSuccessors() == 1)
continue;
-
- Value *InVal = PN->getIncomingValue(i);
-
- // If the InVal is an invoke in the pred, we can't put a load on the edge.
- if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
- if (II->getParent() == Pred)
- return false;
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
@@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
UI != UE; ++UI) {
User *U = *UI;
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return false;
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI || SI->isVolatile())
+ if (SI->getOperand(0) == AI || !SI->isSimple())
return false; // Don't allow a store OF the AI, only INTO the AI.
continue;
}
@@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
continue;
}
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (onlyUsedByLifetimeMarkers(BCI)) {
+ InstsToRewrite.insert(BCI);
+ continue;
+ }
+ }
+
return false;
}
@@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
// If we have instructions that need to be rewritten for this to be promotable
// take care of it now.
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
+ // This could only be a bitcast used by nothing but lifetime intrinsics.
+ for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
+ I != E;) {
+ Use &U = I.getUse();
+ ++I;
+ cast<Instruction>(U.getUser())->eraseFromParent();
+ }
+ BCI->eraseFromParent();
+ continue;
+ }
+
if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
// Selects in InstsToRewrite only have load uses. Rewrite each as two
// loads with a new select.
@@ -1670,7 +1521,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
UI.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
@@ -1679,13 +1530,17 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
Type *SIType = SI->getOperand(0)->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return MarkUnsafe(Info, User);
} else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
@@ -1725,7 +1580,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (LI->isVolatile())
+ if (!LI->isSimple())
return MarkUnsafe(Info, User);
Type *LIType = LI->getType();
isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
@@ -1734,7 +1589,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (SI->isVolatile() || SI->getOperand(0) == I)
+ if (!SI->isSimple() || SI->getOperand(0) == I)
return MarkUnsafe(Info, User);
Type *SIType = SI->getOperand(0)->getType();
@@ -1923,6 +1778,14 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// address operand will be updated, so nothing else needs to be done.
continue;
}
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
+ }
+ continue;
+ }
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
Type *LIType = LI->getType();
@@ -2080,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
}
Instruction *Val = NewElts[Idx];
if (NewArgs.size() > 1) {
- Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
- NewArgs.end(), "", GEPI);
+ Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
Val->takeName(GEPI);
}
if (Val->getType() != GEPI->getType())
@@ -2090,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
DeadInsts.push_back(GEPI);
}
+/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
+/// to mark the lifetime of the scalarized memory.
+void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
+ uint64_t Offset,
+ SmallVector<AllocaInst*, 32> &NewElts) {
+ ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
+ // Put matching lifetime markers on everything from Offset up to
+ // Offset+OldSize.
+ Type *AIType = AI->getAllocatedType();
+ uint64_t NewOffset = Offset;
+ Type *IdxTy;
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+
+ IRBuilder<> Builder(II);
+ uint64_t Size = OldSize->getLimitedValue();
+
+ if (NewOffset) {
+ // Splice the first element and index 'NewOffset' bytes in. SROA will
+ // split the alloca again later.
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
+ ++Idx;
+ }
+
+ for (; Idx != NewElts.size() && Size; ++Idx) {
+ IdxTy = NewElts[Idx]->getAllocatedType();
+ uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ if (EltSize > Size) {
+ EltSize = Size;
+ Size = 0;
+ } else {
+ Size -= EltSize;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Builder.CreateLifetimeStart(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ else
+ Builder.CreateLifetimeEnd(NewElts[Idx],
+ Builder.getInt64(EltSize));
+ }
+ DeadInsts.push_back(II);
+}
+
/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
/// Rewrite it to copy or set the elements of the scalarized memory.
void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
@@ -2157,7 +2075,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
- OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+ OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
OtherPtr->getName()+"."+Twine(i),
MI);
uint64_t EltOffset;
@@ -2226,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
assert(StoreVal->getType() == ValTy && "Type mismatch!");
// If the requested value was a vector constant, create it.
- if (EltTy != ValTy) {
- unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+ if (EltTy->isVectorTy()) {
+ unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
StoreVal = ConstantVector::get(Elts);
}
@@ -2574,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (LI->isVolatile()) return false;
+ if (!LI->isSimple()) return false;
continue;
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index ad52417f7f..fbb9465743 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -338,16 +338,17 @@ struct StrCmpOpt : public LibCallOptimization {
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
- return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
-
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
// strcmp(x, y) -> cnst (if both x and y are constant strings)
if (HasStr1 && HasStr2)
return ConstantInt::get(CI->getType(),
- strcmp(Str1.c_str(),Str2.c_str()));
+ StringRef(Str1).compare(Str2));
+
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
+
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
@@ -400,16 +401,20 @@ struct StrNCmpOpt : public LibCallOptimization {
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x
- return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = StringRef(Str1).substr(0, Length);
+ StringRef SubStr2 = StringRef(Str2).substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+ }
+
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+ CI->getType()));
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(),
- strncmp(Str1.c_str(), Str2.c_str(), Length));
return 0;
}
};
@@ -874,8 +879,8 @@ struct PowOpt : public LibCallOptimization {
Callee->getAttributes());
Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
Callee->getAttributes());
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
return Sel;
}
@@ -908,10 +913,10 @@ struct Exp2Opt : public LibCallOptimization {
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
}
if (LdExpArg) {
@@ -996,10 +1001,10 @@ struct FFSOpt : public LibCallOptimization {
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, Op, "cttz");
- V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
- V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
- Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 705f442049..c83f56c4d2 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -153,9 +153,13 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
SmallPtrSet<Instruction *, 8> &Stores) {
- if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
- if (L->isVolatile()) return false;
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
AliasAnalysis::Location Loc = AA->getLocation(L);
for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
E = Stores.end(); I != E; ++I)
@@ -163,11 +167,6 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
return false;
}
- if (Inst->mayWriteToMemory()) {
- Stores.insert(Inst);
- return false;
- }
-
if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
return false;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
deleted file mode 100644
index 9dd83c04fa..0000000000
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs a limited form of tail duplication, intended to simplify
-// CFGs by removing some unconditional branches. This pass is necessary to
-// straighten out loops created by the C front-end, but also is capable of
-// making other code nicer. After this pass is run, the CFG simplify pass
-// should be run to clean up the mess.
-//
-// This pass could be enhanced in the future to use profile information to be
-// more aggressive.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "tailduplicate"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constant.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
-
-static cl::opt<unsigned>
-TailDupThreshold("taildup-threshold",
- cl::desc("Max block size to tail duplicate"),
- cl::init(1), cl::Hidden);
-
-namespace {
- class TailDup : public FunctionPass {
- bool runOnFunction(Function &F);
- public:
- static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(ID) {
- initializeTailDupPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
- inline void eliminateUnconditionalBranch(BranchInst *BI);
- SmallPtrSet<BasicBlock*, 4> CycleDetector;
- };
-}
-
-char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
-
-// Public interface to the Tail Duplication pass
-FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
-
-/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
-/// the function, eliminating it if it looks attractive enough. CycleDetector
-/// prevents infinite loops by checking that we aren't redirecting a branch to
-/// a place it already pointed to earlier; see PR 2323.
-bool TailDup::runOnFunction(Function &F) {
- bool Changed = false;
- CycleDetector.clear();
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- if (shouldEliminateUnconditionalBranch(I->getTerminator(),
- TailDupThreshold)) {
- eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
- Changed = true;
- } else {
- ++I;
- CycleDetector.clear();
- }
- }
- return Changed;
-}
-
-/// shouldEliminateUnconditionalBranch - Return true if this branch looks
-/// attractive to eliminate. We eliminate the branch if the destination basic
-/// block has <= 5 instructions in it, not counting PHI nodes. In practice,
-/// since one of these is a terminator instruction, this means that we will add
-/// up to 4 instructions to the new block.
-///
-/// We don't count PHI nodes in the count since they will be removed when the
-/// contents of the block are copied over.
-///
-bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
- unsigned Threshold) {
- BranchInst *BI = dyn_cast<BranchInst>(TI);
- if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch!
-
- BasicBlock *Dest = BI->getSuccessor(0);
- if (Dest == BI->getParent()) return false; // Do not loop infinitely!
-
- // Do not inline a block if we will just get another branch to the same block!
- TerminatorInst *DTI = Dest->getTerminator();
- if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
- if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
- return false; // Do not loop infinitely!
-
- // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
- // because doing so would require breaking critical edges. This should be
- // fixed eventually.
- if (!DTI->use_empty())
- return false;
-
- // Do not bother with blocks with only a single predecessor: simplify
- // CFG will fold these two blocks together!
- pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
- ++PI;
- if (PI == PE) return false; // Exactly one predecessor!
-
- BasicBlock::iterator I = Dest->getFirstNonPHI();
-
- for (unsigned Size = 0; I != Dest->end(); ++I) {
- if (Size == Threshold) return false; // The block is too large.
-
- // Don't tail duplicate call instructions. They are very large compared to
- // other instructions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
-
- // Also alloca and malloc.
- if (isa<AllocaInst>(I)) return false;
-
- // Some vector instructions can expand into a number of instructions.
- if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
- isa<InsertElementInst>(I)) return false;
-
- // Only count instructions that are not debugger intrinsics.
- if (!isa<DbgInfoIntrinsic>(I)) ++Size;
- }
-
- // Do not tail duplicate a block that has thousands of successors into a block
- // with a single successor if the block has many other predecessors. This can
- // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
- // cases that have a large number of indirect gotos.
- unsigned NumSuccs = DTI->getNumSuccessors();
- if (NumSuccs > 8) {
- unsigned TooMany = 128;
- if (NumSuccs >= TooMany) return false;
- TooMany = TooMany/NumSuccs;
- for (; PI != PE; ++PI)
- if (TooMany-- == 0) return false;
- }
-
- // If this unconditional branch is a fall-through, be careful about
- // tail duplicating it. In particular, we don't want to taildup it if the
- // original block will still be there after taildup is completed: doing so
- // would eliminate the fall-through, requiring unconditional branches.
- Function::iterator DestI = Dest;
- if (&*--DestI == BI->getParent()) {
- // The uncond branch is a fall-through. Tail duplication of the block is
- // will eliminate the fall-through-ness and end up cloning the terminator
- // at the end of the Dest block. Since the original Dest block will
- // continue to exist, this means that one or the other will not be able to
- // fall through. One typical example that this helps with is code like:
- // if (a)
- // foo();
- // if (b)
- // foo();
- // Cloning the 'if b' block into the end of the first foo block is messy.
-
- // The messy case is when the fall-through block falls through to other
- // blocks. This is what we would be preventing if we cloned the block.
- DestI = Dest;
- if (++DestI != Dest->getParent()->end()) {
- BasicBlock *DestSucc = DestI;
- // If any of Dest's successors are fall-throughs, don't do this xform.
- for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
- SI != SE; ++SI)
- if (*SI == DestSucc)
- return false;
- }
- }
-
- // Finally, check that we haven't redirected to this target block earlier;
- // there are cases where we loop forever if we don't check this (PR 2323).
- if (!CycleDetector.insert(Dest))
- return false;
-
- return true;
-}
-
-/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
-/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we
-/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
-/// DstBlock, return it.
-static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
- BasicBlock *DstBlock) {
- // SrcBlock must have a single predecessor.
- pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
- if (PI == PE || ++PI != PE) return 0;
-
- BasicBlock *SrcPred = *pred_begin(SrcBlock);
-
- // Look at the predecessors of DstBlock. One of them will be SrcBlock. If
- // there is only one other pred, get it, otherwise we can't handle it.
- PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
- BasicBlock *DstOtherPred = 0;
- BasicBlock *P = *PI;
- if (P == SrcBlock) {
- if (++PI == PE) return 0;
- DstOtherPred = *PI;
- if (++PI != PE) return 0;
- } else {
- DstOtherPred = P;
- if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
- }
-
- // We can handle two situations here: "if then" and "if then else" blocks. An
- // 'if then' situation is just where DstOtherPred == SrcPred.
- if (DstOtherPred == SrcPred)
- return SrcPred;
-
- // Check to see if we have an "if then else" situation, which means that
- // DstOtherPred will have a single predecessor and it will be SrcPred.
- PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
- if (PI != PE && *PI == SrcPred) {
- if (++PI != PE) return 0; // Not a single pred.
- return SrcPred; // Otherwise, it's an "if then" situation. Return the if.
- }
-
- // Otherwise, this is something we can't handle.
- return 0;
-}
-
-
-/// eliminateUnconditionalBranch - Clone the instructions from the destination
-/// block into the source block, eliminating the specified unconditional branch.
-/// If the destination block defines values used by successors of the dest
-/// block, we may need to insert PHI nodes.
-///
-void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
- BasicBlock *SourceBlock = Branch->getParent();
- BasicBlock *DestBlock = Branch->getSuccessor(0);
- assert(SourceBlock != DestBlock && "Our predicate is broken!");
-
- DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
- << "]: Eliminating branch: " << *Branch);
-
- // See if we can avoid duplicating code by moving it up to a dominator of both
- // blocks.
- if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
- DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
-
- // If there are non-phi instructions in DestBlock that have no operands
- // defined in DestBlock, and if the instruction has no side effects, we can
- // move the instruction to DomBlock instead of duplicating it.
- BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
- while (!isa<TerminatorInst>(BBI)) {
- Instruction *I = BBI++;
-
- bool CanHoist = I->isSafeToSpeculativelyExecute() &&
- !I->mayReadFromMemory();
- if (CanHoist) {
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
- if (OpI->getParent() == DestBlock ||
- (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
- CanHoist = false;
- break;
- }
- if (CanHoist) {
- // Remove from DestBlock, move right before the term in DomBlock.
- DestBlock->getInstList().remove(I);
- DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
- DEBUG(dbgs() << "Hoisted: " << *I);
- }
- }
- }
- }
-
- // Tail duplication can not update SSA properties correctly if the values
- // defined in the duplicated tail are used outside of the tail itself. For
- // this reason, we spill all values that are used outside of the tail to the
- // stack.
- for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
- if (I->isUsedOutsideOfBlock(DestBlock)) {
- // We found a use outside of the tail. Create a new stack slot to
- // break this inter-block usage pattern.
- DemoteRegToStack(*I);
- }
-
- // We are going to have to map operands from the original block B to the new
- // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI
- // nodes also define part of this mapping. Loop over these PHI nodes, adding
- // them to our mapping.
- //
- std::map<Value*, Value*> ValueMapping;
-
- BasicBlock::iterator BI = DestBlock->begin();
- bool HadPHINodes = isa<PHINode>(BI);
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
-
- // Clone the non-phi instructions of the dest block into the source block,
- // keeping track of the mapping...
- //
- for (; BI != DestBlock->end(); ++BI) {
- Instruction *New = BI->clone();
- New->setName(BI->getName());
- SourceBlock->getInstList().push_back(New);
- ValueMapping[BI] = New;
- }
-
- // Now that we have built the mapping information and cloned all of the
- // instructions (giving us a new terminator, among other things), walk the new
- // instructions, rewriting references of old instructions to use new
- // instructions.
- //
- BI = Branch; ++BI; // Get an iterator to the first new instruction
- for (; BI != SourceBlock->end(); ++BI)
- for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
- std::map<Value*, Value*>::const_iterator I =
- ValueMapping.find(BI->getOperand(i));
- if (I != ValueMapping.end())
- BI->setOperand(i, I->second);
- }
-
- // Next we check to see if any of the successors of DestBlock had PHI nodes.
- // If so, we need to add entries to the PHI nodes for SourceBlock now.
- for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
- SI != SE; ++SI) {
- BasicBlock *Succ = *SI;
- for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
- PHINode *PN = cast<PHINode>(PNI);
- // Ok, we have a PHI node. Figure out what the incoming value was for the
- // DestBlock.
- Value *IV = PN->getIncomingValueForBlock(DestBlock);
-
- // Remap the value if necessary...
- std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
- if (I != ValueMapping.end())
- IV = I->second;
- PN->addIncoming(IV, SourceBlock);
- }
- }
-
- // Next, remove the old branch instruction, and any PHI node entries that we
- // had.
- BI = Branch; ++BI; // Get an iterator to the first new instruction
- DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
- SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch...
-
- // Final step: now that we have finished everything up, walk the cloned
- // instructions one last time, constant propagating and DCE'ing them, because
- // they may not be needed anymore.
- //
- if (HadPHINodes) {
- while (BI != SourceBlock->end()) {
- Instruction *Inst = BI++;
- if (isInstructionTriviallyDead(Inst))
- Inst->eraseFromParent();
- else if (Value *V = SimplifyInstruction(Inst)) {
- Inst->replaceAllUsesWith(V);
- Inst->eraseFromParent();
- }
- }
- }
-
- ++NumEliminated; // We just killed a branch!
-}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index b4f74f97e9..a7f9efd562 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -287,7 +287,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
///
BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
BasicBlock::iterator SplitIt = SplitPt;
- while (isa<PHINode>(SplitIt))
+ while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
++SplitIt;
BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
@@ -299,138 +299,114 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
// Old dominates New. New node dominates all other nodes dominated by Old.
- DomTreeNode *OldNode = DT->getNode(Old);
- std::vector<DomTreeNode *> Children;
- for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
- I != E; ++I)
- Children.push_back(*I);
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ std::vector<DomTreeNode *> Children;
+ for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+ I != E; ++I)
+ Children.push_back(*I);
DomTreeNode *NewNode = DT->addNewBlock(New,Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
DT->changeImmediateDominator(*I, NewNode);
+ }
}
return New;
}
+/// UpdateAnalysisInformation - Update DominatorTree, LoopInfo, and LCCSA
+/// analysis information.
+static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock *> Preds,
+ Pass *P, bool &HasLoopExit) {
+ if (!P) return;
-/// SplitBlockPredecessors - This method transforms BB by introducing a new
-/// basic block into the function, and moving some of the predecessors of BB to
-/// be predecessors of the new block. The new predecessors are indicated by the
-/// Preds array, which has NumPreds elements in it. The new block is given a
-/// suffix of 'Suffix'.
-///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
-/// preserve LoopSimplify (because it's complicated to handle the case where one
-/// of the edges being split is an exit of a loop with other exits).
-///
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- BasicBlock *const *Preds,
- unsigned NumPreds, const char *Suffix,
- Pass *P) {
- // Create new basic block, insert right before the original block.
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
- BB->getParent(), BB);
-
- // The new block unconditionally branches to the old block.
- BranchInst *BI = BranchInst::Create(BB, NewBB);
-
- LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
- Loop *L = LI ? LI->getLoopFor(BB) : 0;
- bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+ LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+ Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
- // Move the edges from Preds to point to NewBB instead of BB.
- // While here, if we need to preserve loop analyses, collect
- // some information about how this split will affect loops.
- bool HasLoopExit = false;
+ // If we need to preserve loop analyses, collect some information about how
+ // this split will affect loops.
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
- for (unsigned i = 0; i != NumPreds; ++i) {
- // This is slightly more strict than necessary; the minimum requirement
- // is that there be no more than one indirectbr branching to BB. And
- // all BlockAddress uses would need to be updated.
- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
-
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
-
- if (LI) {
- // If we need to preserve LCSSA, determine if any of
- // the preds is a loop exit.
+ if (LI) {
+ bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
if (PreserveLCSSA)
- if (Loop *PL = LI->getLoopFor(Preds[i]))
- if (!PL->contains(BB))
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
HasLoopExit = true;
- // If we need to preserve LoopInfo, note whether any of the
- // preds crosses an interesting loop boundary.
- if (L) {
- if (L->contains(Preds[i]))
- IsLoopEntry = false;
- else
- SplitMakesNewLoopHeader = true;
- }
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L) continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
}
}
// Update dominator tree if available.
- DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
if (DT)
DT->splitBlock(NewBB);
- // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
- // node becomes an incoming value for BB's phi node. However, if the Preds
- // list is empty, we need to insert dummy entries into the PHI nodes in BB to
- // account for the newly created predecessor.
- if (NumPreds == 0) {
- // Insert dummy values as the incoming value.
- for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
- cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
- return NewBB;
+ if (!L) return;
+
+ if (IsLoopEntry) {
+ // Add the new block to the nearest enclosing loop (and not an adjacent
+ // loop). To find this, examine each of the predecessors and determine which
+ // loops enclose them, and select the most-nested loop which contains the
+ // loop containing the block being split.
+ Loop *InnermostPredLoop = 0;
+ for (ArrayRef<BasicBlock*>::iterator
+ i = Preds.begin(), e = Preds.end(); i != e; ++i) {
+ BasicBlock *Pred = *i;
+ if (Loop *PredLoop = LI->getLoopFor(Pred)) {
+ // Seek a loop which actually contains the block being split (to avoid
+ // adjacent loops).
+ while (PredLoop && !PredLoop->contains(OldBB))
+ PredLoop = PredLoop->getParentLoop();
+
+ // Select the most-nested of these loops which contains the block.
+ if (PredLoop && PredLoop->contains(OldBB) &&
+ (!InnermostPredLoop ||
+ InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+ InnermostPredLoop = PredLoop;
+ }
+ }
+
+ if (InnermostPredLoop)
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ } else {
+ L->addBasicBlockToLoop(NewBB, LI->getBase());
+ if (SplitMakesNewLoopHeader)
+ L->moveToHeader(NewBB);
}
+}
+/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
+/// from NewBB. This also updates AliasAnalysis, if available.
+static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
+ ArrayRef<BasicBlock*> Preds, BranchInst *BI,
+ Pass *P, bool HasLoopExit) {
+ // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
-
- if (L) {
- if (IsLoopEntry) {
- // Add the new block to the nearest enclosing loop (and not an
- // adjacent loop). To find this, examine each of the predecessors and
- // determine which loops enclose them, and select the most-nested loop
- // which contains the loop containing the block being split.
- Loop *InnermostPredLoop = 0;
- for (unsigned i = 0; i != NumPreds; ++i)
- if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
- // Seek a loop which actually contains the block being split (to
- // avoid adjacent loops).
- while (PredLoop && !PredLoop->contains(BB))
- PredLoop = PredLoop->getParentLoop();
- // Select the most-nested of these loops which contains the block.
- if (PredLoop &&
- PredLoop->contains(BB) &&
- (!InnermostPredLoop ||
- InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
- InnermostPredLoop = PredLoop;
- }
- if (InnermostPredLoop)
- InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
- } else {
- L->addBasicBlockToLoop(NewBB, LI->getBase());
- if (SplitMakesNewLoopHeader)
- L->moveToHeader(NewBB);
- }
- }
-
- // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
- for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+ for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I++);
-
+
// Check to see if all of the values coming in are the same. If so, we
// don't need to create a new PHI node, unless it's needed for LCSSA.
Value *InVal = 0;
if (!HasLoopExit) {
InVal = PN->getIncomingValueForBlock(Preds[0]);
- for (unsigned i = 1; i != NumPreds; ++i)
+ for (unsigned i = 1, e = Preds.size(); i != e; ++i)
if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
InVal = 0;
break;
@@ -441,31 +417,191 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0; i != NumPreds; ++i)
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i)
PN->removeIncomingValue(Preds[i], false);
} else {
// If the values coming into the block are not the same, we need a PHI.
// Create the new PHI node, insert it into NewBB at the end of the block
PHINode *NewPHI =
- PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI);
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
if (AA) AA->copyValue(PN, NewPHI);
// Move all of the PHI values for 'Preds' to the new PHI.
- for (unsigned i = 0; i != NumPreds; ++i) {
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
Value *V = PN->removeIncomingValue(Preds[i], false);
NewPHI->addIncoming(V, Preds[i]);
}
+
InVal = NewPHI;
}
-
+
// Add an incoming value to the PHI node in the loop for the preheader
// edge.
PN->addIncoming(InVal, NewBB);
}
+}
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block. The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it. The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds, const char *Suffix,
+ Pass *P) {
+ // Create new basic block, insert right before the original block.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+ BB->getParent(), BB);
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+ // Move the edges from Preds to point to NewBB instead of BB.
+ for (unsigned i = 0; i != NumPreds; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ }
+
+ // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+ // node becomes an incoming value for BB's phi node. However, if the Preds
+ // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+ // account for the newly created predecessor.
+ if (NumPreds == 0) {
+ // Insert dummy values as the incoming value.
+ for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+ cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ return NewBB;
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
+ P, HasLoopExit);
+
+ // Update the PHI nodes in BB with the values coming from NewBB.
+ UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
+ P, HasLoopExit);
return NewBB;
}
+/// SplitLandingPadPredecessors - This method transforms the landing pad,
+/// OrigBB, by introducing two new basic blocks into the function. One of those
+/// new basic blocks gets the predecessors listed in Preds. The other basic
+/// block gets the remaining predecessors of OrigBB. The landingpad instruction
+/// OrigBB is clone into both of the new basic blocks. The new blocks are given
+/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
+/// it does not preserve LoopSimplify (because it's complicated to handle the
+/// case where one of the edges being split is an exit of a loop with other
+/// exits).
+///
+void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix1, const char *Suffix2,
+ Pass *P,
+ SmallVectorImpl<BasicBlock*> &NewBBs) {
+ assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
+
+ // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
+ // it right before the original block.
+ BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix1,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB1);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);
+
+ // Move the edges from Preds to point to NewBB1 instead of OrigBB.
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
+ }
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ bool HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB1.
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ SmallVector<BasicBlock*, 8> NewBB2Preds;
+ for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
+ i != e; ) {
+ BasicBlock *Pred = *i++;
+ if (Pred == NewBB1) continue;
+ assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+ NewBB2Preds.push_back(Pred);
+ e = pred_end(OrigBB);
+ }
+
+ BasicBlock *NewBB2 = 0;
+ if (!NewBB2Preds.empty()) {
+ // Create another basic block for the rest of OrigBB's predecessors.
+ NewBB2 = BasicBlock::Create(OrigBB->getContext(),
+ OrigBB->getName() + Suffix2,
+ OrigBB->getParent(), OrigBB);
+ NewBBs.push_back(NewBB2);
+
+ // The new block unconditionally branches to the old block.
+ BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);
+
+ // Move the remaining edges from OrigBB to point to NewBB2.
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
+ (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);
+
+ // Update DominatorTree, LoopInfo, and LCCSA analysis information.
+ HasLoopExit = false;
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+
+ // Update the PHI nodes in OrigBB with the values coming from NewBB2.
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+ }
+
+ LandingPadInst *LPad = OrigBB->getLandingPadInst();
+ Instruction *Clone1 = LPad->clone();
+ Clone1->setName(Twine("lpad") + Suffix1);
+ NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);
+
+ if (NewBB2) {
+ Instruction *Clone2 = LPad->clone();
+ Clone2->setName(Twine("lpad") + Suffix2);
+ NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
+
+ // Create a PHI node for the two cloned landingpad instructions.
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ LPad->eraseFromParent();
+ } else {
+ // There is no second clone. Just replace the landing pad with the first
+ // clone.
+ LPad->replaceAllUsesWith(Clone1);
+ LPad->eraseFromParent();
+ }
+}
+
/// FindFunctionBackedges - Analyze the specified function to find all of the
/// loop backedges in the function and return them. This is a relatively cheap
/// (compared to computing dominators and loop info) analysis.
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 803e431f76..c052910881 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -102,7 +102,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
++I; // Skip one edge due to the incoming arc from TI.
if (!AllowIdenticalEdges)
return I != E;
-
+
// If AllowIdenticalEdges is true, then we allow this edge to be considered
// non-critical iff all preds come from TI's block.
while (I != E) {
@@ -155,10 +155,10 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
/// This returns the new block if the edge was split, null otherwise.
///
/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
-/// specified successor will be merged into the same critical edge block.
-/// This is most commonly interesting with switch instructions, which may
+/// specified successor will be merged into the same critical edge block.
+/// This is most commonly interesting with switch instructions, which may
/// have many edges to any one destination. This ensures that all edges to that
-/// dest go to one block instead of each going to a different block, but isn't
+/// dest go to one block instead of each going to a different block, but isn't
/// the standard definition of a "critical edge".
///
/// It is invalid to call this function on a critical edge that starts at an
@@ -167,15 +167,20 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
/// to.
///
BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
- Pass *P, bool MergeIdenticalEdges) {
+ Pass *P, bool MergeIdenticalEdges,
+ bool DontDeleteUselessPhis) {
if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
-
+
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
-
+
BasicBlock *TIBB = TI->getParent();
BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (DestBB->isLandingPad()) return 0;
+
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
@@ -190,7 +195,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
Function &F = *TIBB->getParent();
Function::iterator FBBI = TIBB;
F.getBasicBlockList().insert(++FBBI, NewBB);
-
+
// If there are any PHI nodes in DestBB, we need to update them so that they
// merge incoming values from NewBB instead of from TIBB.
{
@@ -207,35 +212,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// happens because the BB list of PHI nodes are usually in the same
// order.
if (PN->getIncomingBlock(BBIdx) != TIBB)
- BBIdx = PN->getBasicBlockIndex(TIBB);
+ BBIdx = PN->getBasicBlockIndex(TIBB);
PN->setIncomingBlock(BBIdx, NewBB);
}
}
-
+
// If there are any other edges from TIBB to DestBB, update those to go
// through the split block, making those edges non-critical as well (and
// reducing the number of phi entries in the DestBB if relevant).
if (MergeIdenticalEdges) {
for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
if (TI->getSuccessor(i) != DestBB) continue;
-
+
// Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB);
-
+ DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
}
}
-
-
+
+
// If we don't have a pass object, we can't update anything...
if (P == 0) return NewBB;
-
+
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
-
+
// If we have nothing to update, just return.
if (DT == 0 && LI == 0 && PI == 0)
return NewBB;
@@ -263,7 +268,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
bool NewBBDominatesDestBB = true;
-
+
// Should we update DominatorTree information?
if (DT) {
DomTreeNode *TINode = DT->getNode(TIBB);
@@ -274,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (TINode) { // Don't break unreachable code!
DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
DomTreeNode *DestBBNode = 0;
-
+
// If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
if (!OtherPreds.empty()) {
DestBBNode = DT->getNode(DestBB);
@@ -285,7 +290,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
OtherPreds.clear();
}
-
+
// If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
// doesn't dominate anything.
if (NewBBDominatesDestBB) {
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 204c2c63e1..7adc5f1ac2 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -21,9 +21,17 @@ add_llvm_library(LLVMTransformUtils
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
SimplifyCFG.cpp
+ SimplifyIndVar.cpp
SimplifyInstructions.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
)
+add_llvm_library_dependencies(LLVMTransformUtils
+ LLVMAnalysis
+ LLVMCore
+ LLVMSupport
+ LLVMTarget
+ LLVMipa
+ )
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 6d6661e825..cf21f1ed97 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -331,12 +331,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
TD);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
- if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
- CE);
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(Ops[0], TD);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index a08fa35065..a0e027b5f1 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -50,10 +50,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
I != E; ++I) {
GlobalVariable *GV = new GlobalVariable(*New,
I->getType()->getElementType(),
- false,
- GlobalValue::ExternalLinkage, 0,
- I->getName());
- GV->setAlignment(I->getAlignment());
+ I->isConstant(), I->getLinkage(),
+ (Constant*) 0, I->getName(),
+ (GlobalVariable*) 0,
+ I->isThreadLocal(),
+ I->getType()->getAddressSpace());
+ GV->copyAttributesFrom(I);
VMap[I] = GV;
}
@@ -61,16 +63,19 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
Function *NF =
Function::Create(cast<FunctionType>(I->getType()->getElementType()),
- GlobalValue::ExternalLinkage, I->getName(), New);
+ I->getLinkage(), I->getName(), New);
NF->copyAttributesFrom(I);
VMap[I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
- I != E; ++I)
- VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
- I->getName(), NULL, New);
+ I != E; ++I) {
+ GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
+ I->getName(), NULL, New);
+ GA->copyAttributesFrom(I);
+ VMap[I] = GA;
+ }
// Now that all of the things that global variable initializer can refer to
// have been created, loop through and copy the global variable referrers
@@ -81,9 +86,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(MapValue(I->getInitializer(), VMap));
- GV->setLinkage(I->getLinkage());
- GV->setThreadLocal(I->isThreadLocal());
- GV->setConstant(I->isConstant());
}
// Similarly, copy over function bodies now...
@@ -101,15 +103,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
-
- F->setLinkage(I->getLinkage());
}
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
- GA->setLinkage(I->getLinkage());
if (const Constant *C = I->getAliasee())
GA->setAliasee(MapValue(C, VMap));
}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 8f8e3dc0b0..5f47ebb782 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -55,9 +55,9 @@ namespace {
CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
: DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
- Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+ Function *ExtractCodeRegion(ArrayRef<BasicBlock*> code);
- bool isEligible(const std::vector<BasicBlock*> &code);
+ bool isEligible(ArrayRef<BasicBlock*> code);
private:
/// definedInRegion - Return true if the specified value is defined in the
@@ -317,8 +317,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
TerminatorInst *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP =
- GetElementPtrInst::Create(AI, Idx, Idx+2,
- "gep_" + inputs[i]->getName(), TI);
+ GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
} else
RewriteVal = AI++;
@@ -420,7 +419,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
GetElementPtrInst *GEP =
- GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ GetElementPtrInst::Create(Struct, Idx,
"gep_" + StructValues[i]->getName());
codeReplacer->getInstList().push_back(GEP);
StoreInst *SI = new StoreInst(StructValues[i], GEP);
@@ -446,7 +445,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
GetElementPtrInst *GEP
- = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+ = GetElementPtrInst::Create(Struct, Idx,
"gep_reload_" + outputs[i]->getName());
codeReplacer->getInstList().push_back(GEP);
Output = GEP;
@@ -561,7 +560,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
FirstOut+out);
GetElementPtrInst *GEP =
- GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+ GetElementPtrInst::Create(OAI, Idx,
"gep_" + outputs[out]->getName(),
NTRet);
new StoreInst(outputs[out], GEP, NTRet);
@@ -655,7 +654,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
/// computed result back into memory.
///
Function *CodeExtractor::
-ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+ExtractCodeRegion(ArrayRef<BasicBlock*> code) {
if (!isEligible(code))
return 0;
@@ -755,9 +754,13 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
return newFunction;
}
-bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+bool CodeExtractor::isEligible(ArrayRef<BasicBlock*> code) {
+ // Deny a single basic block that's a landing pad block.
+ if (code.size() == 1 && code[0]->isLandingPad())
+ return false;
+
// Deny code region if it contains allocas or vastarts.
- for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+ for (ArrayRef<BasicBlock*>::iterator BB = code.begin(), e=code.end();
BB != e; ++BB)
for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
I != Ie; ++I)
@@ -771,25 +774,23 @@ bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
}
-/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
-/// function
+/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new
+/// function.
///
Function* llvm::ExtractCodeRegion(DominatorTree &DT,
- const std::vector<BasicBlock*> &code,
+ ArrayRef<BasicBlock*> code,
bool AggregateArgs) {
return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
}
-/// ExtractBasicBlock - slurp a natural loop into a brand new function
+/// ExtractLoop - Slurp a natural loop into a brand new function.
///
Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
}
-/// ExtractBasicBlock - slurp a basic block into a brand new function
+/// ExtractBasicBlock - Slurp a basic block into a brand new function.
///
-Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
- std::vector<BasicBlock*> Blocks;
- Blocks.push_back(BB);
- return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+Function* llvm::ExtractBasicBlock(ArrayRef<BasicBlock*> BBs, bool AggregateArgs){
+ return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs);
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 714b12c7d2..5464dbc4a8 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -45,6 +45,9 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
return InlineFunction(CallSite(II), IFI);
}
+// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is
+// turned on.
+
/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
@@ -250,20 +253,32 @@ namespace {
PHINode *InnerSelectorPHI;
SmallVector<Value*, 8> UnwindDestPHIValues;
+ // FIXME: New EH - These will replace the analogous ones above.
+ BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind.
+ BasicBlock *InnerResumeDest; //< Destination for the callee's resume.
+ LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke.
+ PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts.
+
public:
- InvokeInliningInfo(InvokeInst *II) :
- OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
- InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) {
-
- // If there are PHI nodes in the unwind destination block, we
- // need to keep track of which values came into them from the
- // invoke before removing the edge from this block.
- llvm::BasicBlock *invokeBB = II->getParent();
- for (BasicBlock::iterator I = OuterUnwindDest->begin();
- isa<PHINode>(I); ++I) {
+ InvokeInliningInfo(InvokeInst *II)
+ : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
+ InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0),
+ OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+ CallerLPad(0), InnerEHValuesPHI(0) {
+ // If there are PHI nodes in the unwind destination block, we need to keep
+ // track of which values came into them from the invoke before removing
+ // the edge from this block.
+ llvm::BasicBlock *InvokeBB = II->getParent();
+ BasicBlock::iterator I = OuterUnwindDest->begin();
+ for (; isa<PHINode>(I); ++I) {
// Save the value to use for this edge.
- PHINode *phi = cast<PHINode>(I);
- UnwindDestPHIValues.push_back(phi->getIncomingValueForBlock(invokeBB));
+ PHINode *PHI = cast<PHINode>(I);
+ UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+ }
+
+ // FIXME: With the new EH, this if/dyn_cast should be a 'cast'.
+ if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) {
+ CallerLPad = LPI;
}
}
@@ -281,11 +296,23 @@ namespace {
BasicBlock *getInnerUnwindDest();
+ // FIXME: New EH - Rename when new EH is turned on.
+ BasicBlock *getInnerUnwindDestNewEH();
+
+ LandingPadInst *getLandingPadInst() const { return CallerLPad; }
+
bool forwardEHResume(CallInst *call, BasicBlock *src);
- /// Add incoming-PHI values to the unwind destination block for
- /// the given basic block, using the values for the original
- /// invoke's source block.
+ /// forwardResume - Forward the 'resume' instruction to the caller's landing
+ /// pad block. When the landing pad block has only one predecessor, this is
+ /// a simple branch. When there is more than one predecessor, we need to
+ /// split the landing pad block after the landingpad instruction and jump
+ /// to there.
+ void forwardResume(ResumeInst *RI);
+
+ /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
+ /// destination block for the given basic block, using the values for the
+ /// original invoke's source block.
void addIncomingPHIValuesFor(BasicBlock *BB) const {
addIncomingPHIValuesForInto(BB, OuterUnwindDest);
}
@@ -300,7 +327,7 @@ namespace {
};
}
-/// Get or create a target for the branch out of rewritten calls to
+/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to
/// llvm.eh.resume.
BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
if (InnerUnwindDest) return InnerUnwindDest;
@@ -404,6 +431,60 @@ bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
return true;
}
+/// Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
+ // FIXME: New EH - rename this function when new EH is turned on.
+ if (InnerResumeDest) return InnerResumeDest;
+
+ // Split the landing pad.
+ BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+ InnerResumeDest =
+ OuterResumeDest->splitBasicBlock(SplitPoint,
+ OuterResumeDest->getName() + ".body");
+
+ // The number of incoming edges we expect to the inner landing pad.
+ const unsigned PHICapacity = 2;
+
+ // Create corresponding new PHIs for all the PHIs in the outer landing pad.
+ BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+ BasicBlock::iterator I = OuterResumeDest->begin();
+ for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
+ PHINode *OuterPHI = cast<PHINode>(I);
+ PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
+ OuterPHI->getName() + ".lpad-body",
+ InsertPoint);
+ OuterPHI->replaceAllUsesWith(InnerPHI);
+ InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
+ }
+
+ // Create a PHI for the exception values.
+ InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity,
+ "eh.lpad-body", InsertPoint);
+ CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
+ InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
+
+ // All done.
+ return InnerResumeDest;
+}
+
+/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
+/// block. When the landing pad block has only one predecessor, this is a simple
+/// branch. When there is more than one predecessor, we need to split the
+/// landing pad block after the landingpad instruction and jump to there.
+void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+ BasicBlock *Dest = getInnerUnwindDestNewEH();
+ BasicBlock *Src = RI->getParent();
+
+ BranchInst::Create(Dest, Src);
+
+ // Update the PHIs in the destination. They were inserted in an order which
+ // makes this work.
+ addIncomingPHIValuesForInto(Src, Dest);
+
+ InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
+ RI->eraseFromParent();
+}
+
/// [LIBUNWIND] Check whether this selector is "only cleanups":
/// call i32 @llvm.eh.selector(blah, blah, i32 0)
static bool isCleanupOnlySelector(EHSelectorInst *selector) {
@@ -421,9 +502,19 @@ static bool isCleanupOnlySelector(EHSelectorInst *selector) {
/// Returns true to indicate that the next block should be skipped.
static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInliningInfo &Invoke) {
+ LandingPadInst *LPI = Invoke.getLandingPadInst();
+
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
-
+
+ if (LPI) // FIXME: New EH - This won't be NULL in the new EH.
+ if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+ unsigned NumClauses = LPI->getNumClauses();
+ L->reserveClauses(NumClauses);
+ for (unsigned i = 0; i != NumClauses; ++i)
+ L->addClause(LPI->getClause(i));
+ }
+
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
@@ -557,6 +648,10 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// there is now a new entry in them.
Invoke.addIncomingPHIValuesFor(BB);
}
+
+ if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ Invoke.forwardResume(RI);
+ }
}
// Now that everything is happy, we have one final detail. The PHI nodes in
@@ -829,6 +924,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
return false;
}
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ for (Function::const_iterator
+ I = Caller->begin(), E = Caller->end(); I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'isa' here should become go away once the new EH system is
+ // in place.
+ if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+ continue;
+ const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+ const Value *CallerPersFn = LP->getPersonalityFn();
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ for (Function::const_iterator
+ I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+ // the new EH system is in place.
+ if (const LandingPadInst *LP =
+ dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ if (CallerPersFn != LP->getPersonalityFn())
+ return false;
+ break;
+ }
+
+ break;
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
//
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0f6d9ae99d..7034feb227 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -227,13 +226,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
bool llvm::isInstructionTriviallyDead(Instruction *I) {
if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+ // We don't want the landingpad instruction removed by anything this general.
+ if (isa<LandingPadInst>(I))
+ return false;
+
// We don't want debug info removed by anything this general, unless
// debug info is empty.
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
- if (DDI->getAddress())
+ if (DDI->getAddress())
return false;
return true;
- }
+ }
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
if (DVI->getValue())
return false;
@@ -244,10 +247,16 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
// Safe to delete llvm.stacksave if dead.
if (II->getIntrinsicID() == Intrinsic::stacksave)
return true;
+
+ // Lifetime intrinsics are dead when their right-hand is undef.
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ return isa<UndefValue>(II->getArgOperand(1));
+ }
return false;
}
@@ -712,10 +721,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// their preferred alignment from the beginning.
///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
- unsigned PrefAlign) {
+ unsigned PrefAlign, const TargetData *TD) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ // If the preferred alignment is greater than the natural stack alignment
+ // then don't round up. This avoids dynamic stack realignment.
+ if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+ return Align;
// If there is a requested alignment and if this is an alloca, round up.
if (AI->getAlignment() >= PrefAlign)
return AI->getAlignment();
@@ -766,7 +779,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
Align = std::min(Align, +Value::MaximumAlignment);
if (PrefAlign > Align)
- Align = enforceKnownAlignment(V, Align, PrefAlign);
+ Align = enforceKnownAlignment(V, Align, PrefAlign, TD);
// We don't need to make any adjustment.
return Align;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e79fb5ac21..cbd54a8dcb 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -213,7 +213,7 @@ ReprocessLoop:
// predecessors from outside of the loop, split the edge now.
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
-
+
SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
@@ -325,6 +325,14 @@ ReprocessLoop:
DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
<< ExitingBlock->getName() << "\n");
+ // If any reachable control flow within this loop has changed, notify
+ // ScalarEvolution. Currently assume the parent loop doesn't change
+ // (spliting edges doesn't count). If blocks, CFG edges, or other values
+ // in the parent loop change, then we need call to forgetLoop() for the
+ // parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
@@ -402,13 +410,24 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
}
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
- BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
- LoopBlocks.size(), ".loopexit",
- this);
+ BasicBlock *NewExitBB = 0;
+
+ if (Exit->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
+ LoopBlocks.size()),
+ ".loopexit", ".nonloopexit",
+ this, NewBBs);
+ NewExitBB = NewBBs[0];
+ } else {
+ NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
+ LoopBlocks.size(), ".loopexit",
+ this);
+ }
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
- << NewBB->getName() << "\n");
- return NewBB;
+ << NewExitBB->getName() << "\n");
+ return NewExitBB;
}
/// AddBlockAndPredsToSet - Add the specified block, and all of its
@@ -467,23 +486,23 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
if (&*BBI == SplitPreds[i])
return;
}
-
+
// If it isn't already after an outside block, move it after one. This is
// always good as it makes the uncond branch from the outside block into a
// fall-through.
-
+
// Figure out *which* outside block to put this after. Prefer an outside
// block that neighbors a BB actually in the loop.
BasicBlock *FoundBB = 0;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
+ if (++BBI != NewBB->getParent()->end() &&
L->contains(BBI)) {
FoundBB = SplitPreds[i];
break;
}
}
-
+
// If our heuristic for a *good* bb to place this after doesn't find
// anything, just pick something. It's likely better than leaving it within
// the loop.
@@ -544,7 +563,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
-
+
// Create the new outer loop.
Loop *NewOuter = new Loop();
@@ -735,6 +754,7 @@ void LoopSimplify::verifyAnalysis() const {
}
assert(HasIndBrPred &&
"LoopSimplify has no excuse for missing loop header info!");
+ (void)HasIndBrPred;
}
// Indirectbr can interfere with exit block canonicalization.
@@ -742,12 +762,15 @@ void LoopSimplify::verifyAnalysis() const {
bool HasIndBrExiting = false;
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
HasIndBrExiting = true;
break;
}
+ }
+
assert(HasIndBrExiting &&
"LoopSimplify has no excuse for missing exit block info!");
+ (void)HasIndBrExiting;
}
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 6772511b5d..62e4fa2953 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -11,9 +11,6 @@
// actual pass or policy, but provides a single function to perform loop
// unrolling.
//
-// It works best when loops have been canonicalized by the -indvars pass,
-// allowing it to determine the trip counts of loops easily.
-//
// The process of unrolling can produce extraneous basic blocks linked with
// unconditional branches. This will be corrected in the future.
//
@@ -24,6 +21,7 @@
#include "llvm/BasicBlock.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/Debug.h"
@@ -31,6 +29,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
@@ -61,7 +60,8 @@ static inline void RemapInstruction(Instruction *I,
/// only has one predecessor, and that predecessor only has one successor.
/// The LoopInfo Analysis that is passed will be kept consistent.
/// Returns the new combined block.
-static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
+ LPPassManager *LPM) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -93,6 +93,12 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
std::string OldName = BB->getName();
// Erase basic block from the function...
+
+ // ScalarEvolution holds references to loop exit blocks.
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
+ if (Loop *L = LI->getLoopFor(BB))
+ SE->forgetLoop(L);
+ }
LI->removeBlock(BB);
BB->eraseFromParent();
@@ -109,12 +115,27 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
+/// TripCount is generally defined as the number of times the loop header
+/// executes. UnrollLoop relaxes the definition to permit early exits: here
+/// TripCount is the iteration on which control exits LatchBlock if no early
+/// exits were taken. Note that UnrollLoop assumes that the loop counter test
+/// terminates LatchBlock in order to remove unnecesssary instances of the
+/// test. In other words, control may exit the loop prior to TripCount
+/// iterations via an early branch, but control may not exit the loop from the
+/// LatchBlock's terminator prior to TripCount iterations.
+///
+/// Similarly, TripMultiple divides the number of times that the LatchBlock may
+/// execute without exiting the loop.
+///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
-bool llvm::UnrollLoop(Loop *L, unsigned Count,
- LoopInfo *LI, LPPassManager *LPM) {
+///
+/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
+/// available it must also preserve those analyses.
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+ unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -129,14 +150,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-
+
if (!BI || BI->isUnconditional()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
" Can't unroll; loop not terminated by a conditional branch.\n");
return false;
}
-
+
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
@@ -146,16 +167,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
SE->forgetLoop(L);
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- // Find trip multiple if count is not available
- unsigned TripMultiple = 1;
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
-
if (TripCount != 0)
DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
@@ -208,12 +223,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- OrigPHINode.push_back(PN);
- if (Instruction *I =
- dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
- if (L->contains(I))
- LastValueMap[I] = I;
+ OrigPHINode.push_back(cast<PHINode>(I));
}
std::vector<BasicBlock*> Headers;
@@ -221,11 +231,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
Headers.push_back(Header);
Latches.push_back(LatchBlock);
+ // The current on-the-fly SSA update requires blocks to be processed in
+ // reverse postorder so that LastValueMap contains the correct value at each
+ // exit.
+ LoopBlocksDFS DFS(L);
+ DFS.perform(LI);
+
+ // Stash the DFS iterators before adding blocks to the loop.
+ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
+
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
-
- for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
- E = LoopBlocks.end(); BB != E; ++BB) {
+
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
ValueToValueMapTy VMap;
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
@@ -251,75 +270,55 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
L->addBasicBlockToLoop(New, LI->getBase());
- // Add phi entries for newly created values to all exit blocks except
- // the successor of the latch block. The successor of the exit block will
- // be updated specially after unrolling all the way.
- if (*BB != LatchBlock)
- for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
- ++SI)
- if (!L->contains(*SI))
- for (BasicBlock::iterator BBI = (*SI)->begin();
- PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
- Value *Incoming = phi->getIncomingValueForBlock(*BB);
- phi->addIncoming(Incoming, New);
- }
-
+ // Add phi entries for newly created values to all exit blocks.
+ for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
+ SI != SE; ++SI) {
+ if (L->contains(*SI))
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
+ if (It != LastValueMap.end())
+ Incoming = It->second;
+ phi->addIncoming(Incoming, New);
+ }
+ }
// Keep track of new headers and latches as we create them, so that
// we can insert the proper branches later.
if (*BB == Header)
Headers.push_back(New);
- if (*BB == LatchBlock) {
+ if (*BB == LatchBlock)
Latches.push_back(New);
- // Also, clear out the new latch's back edge so that it doesn't look
- // like a new loop, so that it's amenable to being merged with adjacent
- // blocks later on.
- TerminatorInst *Term = New->getTerminator();
- assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
- assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
- Term->setSuccessor(!ContinueOnTrue, NULL);
- }
-
NewBlocks.push_back(New);
}
-
+
// Remap all instructions in the most recent iteration
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
::RemapInstruction(I, LastValueMap);
}
-
- // The latch block exits the loop. If there are any PHI nodes in the
- // successor blocks, update them to use the appropriate values computed as the
- // last iteration of the loop.
- if (Count != 1) {
- BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
- for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
- SI != SE; ++SI) {
- for (BasicBlock::iterator BBI = (*SI)->begin();
- PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
- Value *InVal = PN->removeIncomingValue(LatchBlock, false);
- // If this value was defined in the loop, take the value defined by the
- // last iteration of the loop.
- if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
- if (L->contains(InValI))
- InVal = LastValueMap[InVal];
- }
- PN->addIncoming(InVal, LastIterationBB);
- }
- }
- }
- // Now, if we're doing complete unrolling, loop over the PHI nodes in the
- // original block, setting them to their incoming values.
- if (CompletelyUnroll) {
- BasicBlock *Preheader = L->getLoopPreheader();
- for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *PN = OrigPHINode[i];
+ // Loop over the PHI nodes in the original block, setting incoming values.
+ for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+ PHINode *PN = OrigPHINode[i];
+ if (CompletelyUnroll) {
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
}
+ else if (Count > 1) {
+ Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+ // If this value was defined in the loop, take the value defined by the
+ // last iteration of the loop.
+ if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+ if (L->contains(InValI))
+ InVal = LastValueMap[InVal];
+ }
+ assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
+ PN->addIncoming(InVal, Latches.back());
+ }
}
// Now that all the basic blocks for the unrolled iterations are in place,
@@ -351,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
// iteration.
Term->setSuccessor(!ContinueOnTrue, Dest);
} else {
+ // Remove phi operands at this loop exit
+ if (Dest != LoopExit) {
+ BasicBlock *BB = Latches[i];
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+ if (*SI == Headers[i])
+ continue;
+ for (BasicBlock::iterator BBI = (*SI)->begin();
+ PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
+ Phi->removeIncomingValue(BB, false);
+ }
+ }
+ }
// Replace the conditional branch with an unconditional one.
BranchInst::Create(Dest, Term);
Term->eraseFromParent();
@@ -362,11 +374,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
}
}
-
+
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ // Incrementally updating domtree after loop unrolling would be easy.
+ if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
+ DT->runOnFunction(*L->getHeader()->getParent());
+
+ // Simplify any new induction variables in the partially unrolled loop.
+ if (SE && !CompletelyUnroll) {
+ SmallVector<WeakVH, 16> DeadInsts;
+ simplifyLoopIVs(L, SE, LPM, DeadInsts);
+
+ // Aggressively clean up dead instructions that simplifyLoopIVs already
+ // identified. Any remaining should be cleaned up below.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+ }
+
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 61ab3f6533..9fdc06a713 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -1,3 +1,16 @@
+//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the 'expect' intrinsic to LLVM metadata.
+//
+//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "lower-expect-intrinsic"
#include "llvm/Constants.h"
#include "llvm/Function.h"
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 8b5891f329..c96c8fce7b 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -127,7 +127,7 @@ bool LowerInvoke::doInitialization(Module &M) {
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
- JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+ JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
JBLinkTy->setBody(Elts);
@@ -240,14 +240,14 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
- BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+ BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
// nonvolatile.
new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
InvokeNum, false, NI);
- Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
- II->getUnwindDest()->getFirstNonPHI()
- );
+ Instruction* StackPtrLoad =
+ new LoadInst(StackPtr, "stackptr.restore", true,
+ II->getUnwindDest()->getFirstInsertionPt());
CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
// Add a switch case to our unwind block.
@@ -406,6 +406,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
SmallVector<ReturnInst*,16> Returns;
SmallVector<UnwindInst*,16> Unwinds;
SmallVector<InvokeInst*,16> Invokes;
+ UnreachableInst* UnreachablePlaceholder = 0;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
@@ -455,8 +456,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
- OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
- "OldBuf",
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
EntryBB->getTerminator());
// Copy the JBListHead to the alloca.
@@ -487,9 +487,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Insert a load in the Catch block, and a switch on its value. By default,
// we go to a block that just does an unwind (which is the correct action
- // for a standard call).
+ // for a standard call). We insert an unreachable instruction here and
+ // modify the block to jump to the correct unwinding pad later.
BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
- Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
+ UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
SwitchInst *CatchSwitch =
@@ -502,8 +503,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
"setjmp.cont");
Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
- Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
- "TheJmpBuf",
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
EntryBB->getTerminator());
JmpBufPtr = new BitCastInst(JmpBufPtr,
Type::getInt8PtrTy(F.getContext()),
@@ -557,8 +557,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Get a pointer to the jmpbuf and longjmp.
Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
- Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
- UnwindBlock);
+ Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
Idx[0] = new BitCastInst(Idx[0],
Type::getInt8PtrTy(F.getContext()),
"tmp", UnwindBlock);
@@ -580,6 +579,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
Unwinds[i]->eraseFromParent();
}
+ // Replace the inserted unreachable with a branch to the unwind handler.
+ if (UnreachablePlaceholder) {
+ BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
+ UnreachablePlaceholder->eraseFromParent();
+ }
+
// Finally, for any returns from this function, if this function contains an
// invoke, restore the old jmpbuf pointer to its input value.
if (OldJmpBufPtr) {
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index ed733d393a..686178ca01 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -277,11 +277,11 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
BasicBlock *CurBlock = SI->getParent();
BasicBlock *OrigBlock = CurBlock;
Function *F = CurBlock->getParent();
- Value *Val = SI->getOperand(0); // The value we are switching on...
+ Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
// If there is only the default destination, don't bother with the code below.
- if (SI->getNumOperands() == 2) {
+ if (SI->getNumCases() == 1) {
BranchInst::Create(SI->getDefaultDest(), CurBlock);
CurBlock->getInstList().erase(SI);
return;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index e5a00f4e97..db3e942513 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -86,11 +86,15 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
UI != UE; ++UI) { // Loop over all of the uses of the alloca
const User *U = *UI;
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
if (LI->isVolatile())
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI)
return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 52145b9c97..b8c3ab4c60 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -63,6 +63,7 @@ class SimplifyCFGOpt {
bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
IRBuilder<> &Builder);
+ bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
bool SimplifyUnreachable(UnreachableInst *UI);
@@ -2138,6 +2139,52 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
return true;
}
+bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
+ // If this is a trivial landing pad that just continues unwinding the caught
+ // exception then zap the landing pad, turning its invokes into calls.
+ BasicBlock *BB = RI->getParent();
+ LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
+ if (RI->getValue() != LPInst)
+ // Not a landing pad, or the resume is not unwinding the exception that
+ // caused control to branch here.
+ return false;
+
+ // Check that there are no other instructions except for debug intrinsics.
+ BasicBlock::iterator I = LPInst, E = RI;
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ // Turn all invokes that unwind here into calls and delete the basic block.
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+ InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ // Insert a call instruction before the invoke.
+ CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+ Call->takeName(II);
+ Call->setCallingConv(II->getCallingConv());
+ Call->setAttributes(II->getAttributes());
+ Call->setDebugLoc(II->getDebugLoc());
+
+ // Anything that used the value produced by the invoke instruction now uses
+ // the value produced by the call instruction. Note that we do this even
+ // for void functions and calls with no uses so that the callgraph edge is
+ // updated.
+ II->replaceAllUsesWith(Call);
+ BB->removePredecessor(II->getParent());
+
+ // Insert a branch to the normal destination right before the invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Finally, delete the invoke instruction!
+ II->eraseFromParent();
+ }
+
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
+}
+
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
BasicBlock *BB = RI->getParent();
if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
@@ -2244,18 +2291,34 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
while (UI != BB->begin()) {
BasicBlock::iterator BBI = UI;
--BBI;
- // Do not delete instructions that can have side effects, like calls
- // (which may never return) and volatile loads and stores.
+ // Do not delete instructions that can have side effects which might cause
+ // the unreachable to not be reachable; specifically, calls and volatile
+ // operations may have this effect.
if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
-
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
- if (SI->isVolatile())
- break;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
- if (LI->isVolatile())
+
+ if (BBI->mayHaveSideEffects()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->isVolatile())
+ break;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->isVolatile())
+ break;
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
+ if (RMWI->isVolatile())
+ break;
+ } else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
+ if (CXI->isVolatile())
+ break;
+ } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
+ !isa<LandingPadInst>(BBI)) {
break;
-
+ }
+ // Note that deleting LandingPad's here is in fact okay, although it
+ // involves a bit of subtle reasoning. If this inst is a LandingPad,
+ // all the predecessors of this block will be the unwind edges of Invokes,
+ // and we can therefore guarantee this block will be erased.
+ }
+
// Delete this instruction (any uses are guaranteed to be dead)
if (!BBI->use_empty())
BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
@@ -2707,6 +2770,71 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return false;
}
+/// Check if passing a value to an instruction will cause undefined behavior.
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (!I->hasOneUse()) // Only look at single-use instructions, for compile time
+ return false;
+
+ if (C->isNullValue()) {
+ Instruction *Use = I->use_back();
+
+ // Now make sure that there are no instructions in between that can alter
+ // control flow (eg. calls)
+ for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
+ if (i == I->getParent()->end() || i->mayHaveSideEffects())
+ return false;
+
+ // Look through GEPs. A load from a GEP derived from NULL is still undefined
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
+ if (GEP->getPointerOperand() == I)
+ return passingValueIsAlwaysUndefined(V, GEP);
+
+ // Look through bitcasts.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
+ return passingValueIsAlwaysUndefined(V, BC);
+
+ // Load from null is undefined.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Use))
+ return LI->getPointerAddressSpace() == 0;
+
+ // Store to null is undefined.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Use))
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ }
+ return false;
+}
+
+/// If BB has an incoming value that will always trigger undefined behavior
+/// (eg. null pointer derefence), remove the branch leading here.
+static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
+ for (BasicBlock::iterator i = BB->begin();
+ PHINode *PHI = dyn_cast<PHINode>(i); ++i)
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
+ TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+ IRBuilder<> Builder(T);
+ if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ BB->removePredecessor(PHI->getIncomingBlock(i));
+ // Turn uncoditional branches into unreachables and remove the dead
+ // destination from conditional branches.
+ if (BI->isUnconditional())
+ Builder.CreateUnreachable();
+ else
+ Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) :
+ BI->getSuccessor(0));
+ BI->eraseFromParent();
+ return true;
+ }
+ // TODO: SwitchInst.
+ }
+
+ return false;
+}
+
bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool Changed = false;
@@ -2730,6 +2858,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// Check for and eliminate duplicate PHI nodes in this block.
Changed |= EliminateDuplicatePHINodes(BB);
+ // Check for and remove branches that will always cause undefined behavior.
+ Changed |= removeUndefIntroducingPredecessor(BB);
+
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -2752,6 +2883,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
} else {
if (SimplifyCondBranch(BI, Builder)) return true;
}
+ } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+ if (SimplifyResume(RI, Builder)) return true;
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
if (SimplifyReturn(RI, Builder)) return true;
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
new file mode 100644
index 0000000000..76289c055b
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -0,0 +1,432 @@
+//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements induction variable simplification. It does
+// not define any actual pass or policy, but provides a single function to
+// simplify a loop's induction variables based on ScalarEvolution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
+STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
+
+namespace {
+ /// SimplifyIndvar - This is a utility for simplifying induction variables
+ /// based on ScalarEvolution. It is the primary instrument of the
+ /// IndvarSimplify pass, but it may also be directly invoked to cleanup after
+ /// other loop passes that preserve SCEV.
+ class SimplifyIndvar {
+ Loop *L;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ IVUsers *IU; // NULL for DisableIVRewrite
+ const TargetData *TD; // May be NULL
+
+ SmallVectorImpl<WeakVH> &DeadInsts;
+
+ bool Changed;
+
+ public:
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+ L(Loop),
+ LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
+ SE(SE),
+ IU(IVU),
+ TD(LPM->getAnalysisIfAvailable<TargetData>()),
+ DeadInsts(Dead),
+ Changed(false) {
+ assert(LI && "IV simplification requires LoopInfo");
+ }
+
+ bool hasChanged() const { return Changed; }
+
+ /// Iteratively perform simplification on a worklist of users of the
+ /// specified induction variable. This is the top-level driver that applies
+ /// all simplicitions to users of an IV.
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+
+ Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
+
+ bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
+ void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool IsSigned);
+ };
+}
+
+/// foldIVUser - Fold an IV operand into its use. This removes increments of an
+/// aligned IV when used by a instruction that ignores the low bits.
+///
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+///
+/// Return the operand of IVOperand for this induction variable if IVOperand can
+/// be folded (in case more folding opportunities have been exposed).
+/// Otherwise return null.
+Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
+ Value *IVSrc = 0;
+ unsigned OperIdx = 0;
+ const SCEV *FoldedExpr = 0;
+ switch (UseInst->getOpcode()) {
+ default:
+ return 0;
+ case Instruction::UDiv:
+ case Instruction::LShr:
+ // We're only interested in the case where we know something about
+ // the numerator and have a constant denominator.
+ if (IVOperand != UseInst->getOperand(OperIdx) ||
+ !isa<ConstantInt>(UseInst->getOperand(1)))
+ return 0;
+
+ // Attempt to fold a binary operator with constant operand.
+ // e.g. ((I + 1) >> 2) => I >> 2
+ if (IVOperand->getNumOperands() != 2 ||
+ !isa<ConstantInt>(IVOperand->getOperand(1)))
+ return 0;
+
+ IVSrc = IVOperand->getOperand(0);
+ // IVSrc must be the (SCEVable) IV, since the other operand is const.
+ assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
+
+ ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
+ if (UseInst->getOpcode() == Instruction::LShr) {
+ // Get a constant for the divisor. See createSCEV.
+ uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
+ if (D->getValue().uge(BitWidth))
+ return 0;
+
+ D = ConstantInt::get(UseInst->getContext(),
+ APInt(BitWidth, 1).shl(D->getZExtValue()));
+ }
+ FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ }
+ // We have something that might fold it's operand. Compare SCEVs.
+ if (!SE->isSCEVable(UseInst->getType()))
+ return 0;
+
+ // Bypass the operand if SCEV can prove it has no effect.
+ if (SE->getSCEV(UseInst) != FoldedExpr)
+ return 0;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
+ << " -> " << *UseInst << '\n');
+
+ UseInst->setOperand(OperIdx, IVSrc);
+ assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+
+ ++NumElimOperand;
+ Changed = true;
+ if (IVOperand->use_empty())
+ DeadInsts.push_back(IVOperand);
+ return IVSrc;
+}
+
+/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless
+/// comparisons against an induction variable.
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+ unsigned IVOperIdx = 0;
+ ICmpInst::Predicate Pred = ICmp->getPredicate();
+ if (IVOperand != ICmp->getOperand(0)) {
+ // Swapped
+ assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+ IVOperIdx = 1;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
+ const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // If the condition is always true or always false, replace it with
+ // a constant value.
+ if (SE->isKnownPredicate(Pred, S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+ else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+ ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+ else
+ return;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+ ++NumElimCmp;
+ Changed = true;
+ DeadInsts.push_back(ICmp);
+}
+
+/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless
+/// remainder operations operating on an induction variable.
+void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
+ Value *IVOperand,
+ bool IsSigned) {
+ // We're only interested in the case where we know something about
+ // the numerator.
+ if (IVOperand != Rem->getOperand(0))
+ return;
+
+ // Get the SCEVs for the ICmp operands.
+ const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+ const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+ S = SE->getSCEVAtScope(S, ICmpLoop);
+ X = SE->getSCEVAtScope(X, ICmpLoop);
+
+ // i % n --> i if i is in [0,n).
+ if ((!IsSigned || SE->isKnownNonNegative(S)) &&
+ SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ S, X))
+ Rem->replaceAllUsesWith(Rem->getOperand(0));
+ else {
+ // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
+ const SCEV *LessOne =
+ SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+ if (IsSigned && !SE->isKnownNonNegative(LessOne))
+ return;
+
+ if (!SE->isKnownPredicate(IsSigned ?
+ ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ LessOne, X))
+ return;
+
+ ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+ Rem->getOperand(0), Rem->getOperand(1));
+ SelectInst *Sel =
+ SelectInst::Create(ICmp,
+ ConstantInt::get(Rem->getType(), 0),
+ Rem->getOperand(0), "tmp", Rem);
+ Rem->replaceAllUsesWith(Sel);
+ }
+
+ // Inform IVUsers about the new users.
+ if (IU) {
+ if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+ IU->AddUsersIfInteresting(I);
+ }
+ DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+ ++NumElimRem;
+ Changed = true;
+ DeadInsts.push_back(Rem);
+}
+
+/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
+ Instruction *IVOperand) {
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ eliminateIVComparison(ICmp, IVOperand);
+ return true;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ return true;
+ }
+ }
+
+ // Eliminate any operation that SCEV can prove is an identity function.
+ if (!SE->isSCEVable(UseInst->getType()) ||
+ (UseInst->getType() != IVOperand->getType()) ||
+ (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+ return false;
+
+ DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+ UseInst->replaceAllUsesWith(IVOperand);
+ ++NumElimIdentity;
+ Changed = true;
+ DeadInsts.push_back(UseInst);
+ return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+ Instruction *Def,
+ SmallPtrSet<Instruction*,16> &Simplified,
+ SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+ for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Avoid infinite or exponential worklist processing.
+ // Also ensure unique worklist users.
+ // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+ // self edges first.
+ if (User != Def && Simplified.insert(User))
+ SimpleIVUsers.push_back(std::make_pair(User, Def));
+ }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Get the symbolic expression for this instruction.
+ const SCEV *S = SE->getSCEV(I);
+
+ // Only consider affine recurrences.
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+ if (AR && AR->getLoop() == L)
+ return true;
+
+ return false;
+}
+
+/// simplifyUsers - Iteratively perform simplification on a worklist of users
+/// of the specified induction variable. Each successive simplification may push
+/// more users which may themselves be candidates for simplification.
+///
+/// This algorithm does not require IVUsers analysis. Instead, it simplifies
+/// instructions in-place during analysis. Rather than rewriting induction
+/// variables bottom-up from their users, it transforms a chain of IVUsers
+/// top-down, updating the IR only when it encouters a clear optimization
+/// opportunitiy.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
+ if (!SE->isSCEVable(CurrIV->getType()))
+ return;
+
+ // Instructions processed by SimplifyIndvar for CurrIV.
+ SmallPtrSet<Instruction*,16> Simplified;
+
+ // Use-def pairs if IV users waiting to be processed for CurrIV.
+ SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+ // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+ // called multiple times for the same LoopPhi. This is the proper thing to
+ // do for loop header phis that use each other.
+ pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+ while (!SimpleIVUsers.empty()) {
+ std::pair<Instruction*, Instruction*> UseOper =
+ SimpleIVUsers.pop_back_val();
+ // Bypass back edges to avoid extra work.
+ if (UseOper.first == CurrIV) continue;
+
+ Instruction *IVOperand = UseOper.second;
+ for (unsigned N = 0; IVOperand; ++N) {
+ assert(N <= Simplified.size() && "runaway iteration");
+
+ Value *NewOper = foldIVUser(UseOper.first, IVOperand);
+ if (!NewOper)
+ break; // done folding
+ IVOperand = dyn_cast<Instruction>(NewOper);
+ }
+ if (!IVOperand)
+ continue;
+
+ if (eliminateIVUser(UseOper.first, IVOperand)) {
+ pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+ continue;
+ }
+ CastInst *Cast = dyn_cast<CastInst>(UseOper.first);
+ if (V && Cast) {
+ V->visitCast(Cast);
+ continue;
+ }
+ if (isSimpleIVUser(UseOper.first, L, SE)) {
+ pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+ }
+ }
+}
+
+namespace llvm {
+
+/// simplifyUsersOfIV - Simplify instructions that use this induction variable
+/// by using ScalarEvolution to analyze the IV's recurrence.
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
+{
+ LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+ SIV.simplifyUsers(CurrIV, V);
+ return SIV.hasChanged();
+}
+
+/// simplifyLoopIVs - Simplify users of induction variables within this
+/// loop. This does not actually change or add IVs.
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead) {
+ bool Changed = false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+ }
+ return Changed;
+}
+
+/// simplifyIVUsers - Perform simplification on instructions recorded by the
+/// IVUsers pass.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyLoopIVs.
+bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM,
+ SmallVectorImpl<WeakVH> &Dead) {
+ SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead);
+
+ // Each round of simplification involves a round of eliminating operations
+ // followed by a round of widening IVs. A single IVUsers worklist is used
+ // across all rounds. The inner loop advances the user. If widening exposes
+ // more uses, then another pass through the outer loop is triggered.
+ for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+ Instruction *UseInst = I->getUser();
+ Value *IVOperand = I->getOperandValToReplace();
+
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ SIV.eliminateIVComparison(ICmp, IVOperand);
+ continue;
+ }
+ if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+ if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+ SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ continue;
+ }
+ }
+ }
+ return SIV.hasChanged();
+}
+
+} // namespace llvm
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 973b105a1c..fc2538db64 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -183,10 +183,9 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
}
}
- // Remap attached metadata. Don't bother remapping DebugLoc, it can never
- // have mappings to do.
+ // Remap attached metadata.
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
- I->getAllMetadataOtherThanDebugLoc(MDs);
+ I->getAllMetadata(MDs);
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
MDNode *Old = MI->second;