aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Transforms/IPO/InlinerPass.h4
-rw-r--r--lib/Transforms/IPO/Inliner.cpp150
-rw-r--r--test/Transforms/Inline/array_merge.ll26
3 files changed, 151 insertions, 29 deletions
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 36c7445c79..bf6299964e 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -79,10 +79,6 @@ private:
/// shouldInline - Return true if the inliner should attempt to
/// inline at the given CallSite.
bool shouldInline(CallSite CS);
-
- bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
- const SmallPtrSet<Function*, 8> &SCCFunctions,
- const TargetData *TD);
};
} // End llvm namespace
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 965849cc4f..1468155cda 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -33,6 +33,7 @@ using namespace llvm;
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
static cl::opt<int>
InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore,
@@ -51,15 +52,25 @@ void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
CallGraphSCCPass::getAnalysisUsage(Info);
}
-// InlineCallIfPossible - If it is possible to inline the specified call site,
-// do so and update the CallGraph for this operation.
-bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
- const SmallPtrSet<Function*, 8> &SCCFunctions,
- const TargetData *TD) {
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR. The
+/// InlinedArrayAllocas map keeps track of any
+static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
+ const TargetData *TD,
+ InlinedArrayAllocasTy &InlinedArrayAllocas) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
- if (!InlineFunction(CS, &CG, TD))
+ // Try to inline the function. Get the list of static allocas that were
+ // inlined.
+ SmallVector<AllocaInst*, 16> StaticAllocas;
+ if (!InlineFunction(CS, &CG, TD, &StaticAllocas))
return false;
// If the inlined function had a higher stack protection level than the
@@ -70,24 +81,89 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,
!Caller->hasFnAttr(Attribute::StackProtectReq))
Caller->addFnAttr(Attribute::StackProtect);
- // If we inlined the last possible call site to the function, delete the
- // function body now.
- if (Callee->use_empty() &&
- (Callee->hasLocalLinkage() || Callee->hasAvailableExternallyLinkage()) &&
- !SCCFunctions.count(Callee)) {
- DEBUG(errs() << " -> Deleting dead function: "
- << Callee->getName() << "\n");
- CallGraphNode *CalleeNode = CG[Callee];
-
- // Remove any call graph edges from the callee to its callees.
- CalleeNode->removeAllCalledFunctions();
+
+ // Look at all of the allocas that we inlined through this call site. If we
+ // have already inlined other allocas through other calls into this function,
+ // then we know that they have disjoint lifetimes and that we can merge them.
+ //
+ // There are many heuristics possible for merging these allocas, and the
+ // different options have different tradeoffs. One thing that we *really*
+ // don't want to hurt is SRoA: once inlining happens, often allocas are no
+ // longer address taken and so they can be promoted.
+ //
+ // Our "solution" for that is to only merge allocas whose outermost type is an
+ // array type. These are usually not promoted because someone is using a
+ // variable index into them. These are also often the most important ones to
+ // merge.
+ //
+ // A better solution would be to have real memory lifetime markers in the IR
+ // and not have the inliner do any merging of allocas at all. This would
+ // allow the backend to do proper stack slot coloring of all allocas that
+ // *actually make it to the backend*, which is really what we want.
+ //
+ // Because we don't have this information, we do this simple and useful hack.
+ //
+ SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+
+ // Loop over all the allocas we have so far and see if they can be merged with
+ // a previously inlined alloca. If not, remember that we had it.
+ for (unsigned AllocaNo = 0, e = StaticAllocas.size();
+ AllocaNo != e; ++AllocaNo) {
+ AllocaInst *AI = StaticAllocas[AllocaNo];
+
+ // Don't bother trying to merge array allocations (they will usually be
+ // canonicalized to be an allocation *of* an array), or allocations whose
+ // type is not itself an array (because we're afraid of pessimizing SRoA).
+ const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (ATy == 0 || AI->isArrayAllocation())
+ continue;
+
+ // Get the list of all available allocas for this array type.
+ std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+
+ // Loop over the allocas in AllocasForType to see if we can reuse one. Note
+ // that we have to be careful not to reuse the same "available" alloca for
+ // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+ // set to keep track of which "available" allocas are being used by this
+ // function. Also, AllocasForType can be empty of course!
+ bool MergedAwayAlloca = false;
+ for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+ AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ // The available alloca has to be in the right function, not in some other
+ // function in this SCC.
+ if (AvailableAlloca->getParent() != AI->getParent())
+ continue;
+
+ // If the inlined function already uses this alloca then we can't reuse
+ // it.
+ if (!UsedAllocas.insert(AvailableAlloca))
+ continue;
+
+ // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+ // success!
+ DEBUG(errs() << " ***MERGED ALLOCA: " << *AI);
+
+ AI->replaceAllUsesWith(AvailableAlloca);
+ AI->eraseFromParent();
+ MergedAwayAlloca = true;
+ ++NumMergedAllocas;
+ break;
+ }
- resetCachedCostInfo(CalleeNode->getFunction());
+ // If we already nuked the alloca, we're done with it.
+ if (MergedAwayAlloca)
+ continue;
- // Removing the node for callee from the call graph and delete it.
- delete CG.removeFunctionFromModule(CalleeNode);
- ++NumDeleted;
+ // If we were unable to merge away the alloca either because there are no
+ // allocas of the right type available or because we reused them all
+ // already, remember that this alloca came from an inlined function and mark
+ // it used so we don't reuse it for other allocas from this inline
+ // operation.
+ AllocasForType.push_back(AI);
+ UsedAllocas.insert(AI);
}
+
return true;
}
@@ -143,7 +219,7 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
// Scan through and identify all call sites ahead of time so that we only
// inline call sites in the original functions, not call sites that result
// from inlining other functions.
- std::vector<CallSite> CallSites;
+ SmallVector<CallSite, 16> CallSites;
for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
Function *F = SCC[i]->getFunction();
@@ -171,6 +247,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
if (SCCFunctions.count(F))
std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+ InlinedArrayAllocasTy InlinedArrayAllocas;
+
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
bool Changed = false;
@@ -181,7 +260,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
// calls to become direct calls.
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
// We can only inline direct calls.
- Function *Callee = CallSites[CSi].getCalledFunction();
+ CallSite CS = CallSites[CSi];
+
+ Function *Callee = CS.getCalledFunction();
if (!Callee) continue;
// Calls to external functions are never inlinable.
@@ -199,15 +280,34 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
// If the policy determines that we should inline this function,
// try to do so.
- CallSite CS = CallSites[CSi];
if (!shouldInline(CS))
continue;
Function *Caller = CS.getCaller();
// Attempt to inline the function...
- if (!InlineCallIfPossible(CS, CG, SCCFunctions, TD))
+ if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
continue;
+ // If we inlined the last possible call site to the function, delete the
+ // function body now.
+ if (Callee->use_empty() &&
+ (Callee->hasLocalLinkage() ||
+ Callee->hasAvailableExternallyLinkage()) &&
+ !SCCFunctions.count(Callee)) {
+ DEBUG(errs() << " -> Deleting dead function: "
+ << Callee->getName() << "\n");
+ CallGraphNode *CalleeNode = CG[Callee];
+
+ // Remove any call graph edges from the callee to its callees.
+ CalleeNode->removeAllCalledFunctions();
+
+ resetCachedCostInfo(Callee);
+
+ // Removing the node for callee from the call graph and delete it.
+ delete CG.removeFunctionFromModule(CalleeNode);
+ ++NumDeleted;
+ }
+
// Remove any cached cost info for this caller, as inlining the
// callee has increased the size of the caller (which may be the
// same as the callee).
diff --git a/test/Transforms/Inline/array_merge.ll b/test/Transforms/Inline/array_merge.ll
new file mode 100644
index 0000000000..f4f53ca1ce
--- /dev/null
+++ b/test/Transforms/Inline/array_merge.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | opt -inline | llvm-dis | FileCheck %s
+; rdar://7173846
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define internal void @foo() nounwind ssp {
+entry:
+ %A = alloca [100 x i32]
+ %B = alloca [100 x i32]
+ call void @bar([100 x i32]* %A, [100 x i32]* %B) nounwind
+ ret void
+}
+
+declare void @bar([100 x i32]*, [100 x i32]*)
+
+define void @test() nounwind ssp {
+entry:
+; CHECK: @test()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %A.i = alloca
+; CHECK-NEXT: %B.i = alloca
+; CHECK-NEXT: call void
+ call void @foo() nounwind
+ call void @foo() nounwind
+ ret void
+}