diff options
author | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
---|---|---|
committer | Dan Gohman <djg@cray.com> | 2007-07-18 16:29:46 +0000 |
commit | f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc (patch) | |
tree | ebb79ea1ee5e3bc1fdf38541a811a8b804f0679a /lib/Transforms/Utils | |
download | external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.gz external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.tar.bz2 external_llvm-f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cc.zip |
It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Utils')
22 files changed, 8023 insertions, 0 deletions
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp new file mode 100644 index 0000000000..520cfeb58b --- /dev/null +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -0,0 +1,175 @@ +//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on basic blocks, and +// instructions contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Constant.h" +#include "llvm/Type.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Dominators.h" +#include <algorithm> +using namespace llvm; + +/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI) +/// with a value, then remove and delete the original instruction. +/// +void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Value *V) { + Instruction &I = *BI; + // Replaces all of the uses of the instruction with uses of the value + I.replaceAllUsesWith(V); + + // Make sure to propagate a name if there is one already. + if (I.hasName() && !V->hasName()) + V->takeName(&I); + + // Delete the unnecessary instruction now... + BI = BIL.erase(BI); +} + + +/// ReplaceInstWithInst - Replace the instruction specified by BI with the +/// instruction specified by I. The original instruction is deleted and BI is +/// updated to point to the new instruction. +/// +void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, + BasicBlock::iterator &BI, Instruction *I) { + assert(I->getParent() == 0 && + "ReplaceInstWithInst: Instruction already inserted into basic block!"); + + // Insert the new instruction into the basic block... + BasicBlock::iterator New = BIL.insert(BI, I); + + // Replace all uses of the old instruction, and delete it. + ReplaceInstWithValue(BIL, BI, I); + + // Move BI back to point to the newly inserted instruction + BI = New; +} + +/// ReplaceInstWithInst - Replace the instruction specified by From with the +/// instruction specified by To. +/// +void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { + BasicBlock::iterator BI(From); + ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); +} + +/// RemoveSuccessor - Change the specified terminator instruction such that its +/// successor SuccNum no longer exists. Because this reduces the outgoing +/// degree of the current basic block, the actual terminator instruction itself +/// may have to be changed. In the case where the last successor of the block +/// is deleted, a return instruction is inserted in its place which can cause a +/// surprising change in program behavior if it is not expected. +/// +void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { + assert(SuccNum < TI->getNumSuccessors() && + "Trying to remove a nonexistant successor!"); + + // If our old successor block contains any PHI nodes, remove the entry in the + // PHI nodes that comes from this branch... + // + BasicBlock *BB = TI->getParent(); + TI->getSuccessor(SuccNum)->removePredecessor(BB); + + TerminatorInst *NewTI = 0; + switch (TI->getOpcode()) { + case Instruction::Br: + // If this is a conditional branch... convert to unconditional branch. + if (TI->getNumSuccessors() == 2) { + cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum)); + } else { // Otherwise convert to a return instruction... + Value *RetVal = 0; + + // Create a value to return... if the function doesn't return null... + if (BB->getParent()->getReturnType() != Type::VoidTy) + RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); + + // Create the return... + NewTI = new ReturnInst(RetVal); + } + break; + + case Instruction::Invoke: // Should convert to call + case Instruction::Switch: // Should remove entry + default: + case Instruction::Ret: // Cannot happen, has no successors! + assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!"); + abort(); + } + + if (NewTI) // If it's a different instruction, replace. + ReplaceInstWithInst(TI, NewTI); +} + +/// SplitEdge - Split the edge connecting specified block. Pass P must +/// not be NULL. +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { + TerminatorInst *LatchTerm = BB->getTerminator(); + unsigned SuccNum = 0; + for (unsigned i = 0, e = LatchTerm->getNumSuccessors(); ; ++i) { + assert(i != e && "Didn't find edge?"); + if (LatchTerm->getSuccessor(i) == Succ) { + SuccNum = i; + break; + } + } + + // If this is a critical edge, let SplitCriticalEdge do it. + if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P)) + return LatchTerm->getSuccessor(SuccNum); + + // If the edge isn't critical, then BB has a single successor or Succ has a + // single pred. Split the block. + BasicBlock::iterator SplitPoint; + if (BasicBlock *SP = Succ->getSinglePredecessor()) { + // If the successor only has a single pred, split the top of the successor + // block. + assert(SP == BB && "CFG broken"); + return SplitBlock(Succ, Succ->begin(), P); + } else { + // Otherwise, if BB has a single successor, split it at the bottom of the + // block. + assert(BB->getTerminator()->getNumSuccessors() == 1 && + "Should have a single succ!"); + return SplitBlock(BB, BB->getTerminator(), P); + } +} + +/// SplitBlock - Split the specified block at the specified instruction - every +/// thing before SplitPt stays in Old and everything starting with SplitPt moves +/// to a new block. The two blocks are joined by an unconditional branch and +/// the loop info is updated. +/// +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { + + LoopInfo &LI = P->getAnalysis<LoopInfo>(); + BasicBlock::iterator SplitIt = SplitPt; + while (isa<PHINode>(SplitIt)) + ++SplitIt; + BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + + // The new block lives in whichever loop the old one did. + if (Loop *L = LI.getLoopFor(Old)) + L->addBasicBlockToLoop(New, LI); + + if (DominatorTree *DT = P->getAnalysisToUpdate<DominatorTree>()) + DT->addNewBlock(New, Old); + + if (DominanceFrontier *DF = P->getAnalysisToUpdate<DominanceFrontier>()) + DF->splitBlock(Old); + + return New; +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp new file mode 100644 index 0000000000..af9a114bbe --- /dev/null +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -0,0 +1,269 @@ +//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// BreakCriticalEdges pass - Break all of the critical edges in the CFG by +// inserting a dummy basic block. This pass may be "required" by passes that +// cannot deal with critical edges. For this usage, the structure type is +// forward declared. This pass obviously invalidates the CFG, but can update +// forward dominator (set, immediate dominators, tree, and frontier) +// information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "break-crit-edges" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumBroken, "Number of blocks inserted"); + +namespace { + struct VISIBILITY_HIDDEN BreakCriticalEdges : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BreakCriticalEdges() : FunctionPass((intptr_t)&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<LoopInfo>(); + + // No loop canonicalization guarantees are broken by this pass. + AU.addPreservedID(LoopSimplifyID); + } + }; + + char BreakCriticalEdges::ID = 0; + RegisterPass<BreakCriticalEdges> X("break-crit-edges", + "Break critical edges in CFG"); +} + +// Publically exposed interface to pass... +const PassInfo *llvm::BreakCriticalEdgesID = X.getPassInfo(); +FunctionPass *llvm::createBreakCriticalEdgesPass() { + return new BreakCriticalEdges(); +} + +// runOnFunction - Loop over all of the edges in the CFG, breaking critical +// edges as they are found. +// +bool BreakCriticalEdges::runOnFunction(Function &F) { + bool Changed = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + TerminatorInst *TI = I->getTerminator(); + if (TI->getNumSuccessors() > 1) + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (SplitCriticalEdge(TI, i, this)) { + ++NumBroken; + Changed = true; + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Implementation of the external critical edge manipulation functions +//===----------------------------------------------------------------------===// + +// isCriticalEdge - Return true if the specified edge is a critical edge. +// Critical edges are edges from a block with multiple successors to a block +// with multiple predecessors. +// +bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, + bool AllowIdenticalEdges) { + assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + if (TI->getNumSuccessors() == 1) return false; + + const BasicBlock *Dest = TI->getSuccessor(SuccNum); + pred_const_iterator I = pred_begin(Dest), E = pred_end(Dest); + + // If there is more than one predecessor, this is a critical edge... + assert(I != E && "No preds, but we have an edge to the block?"); + const BasicBlock *FirstPred = *I; + ++I; // Skip one edge due to the incoming arc from TI. + if (!AllowIdenticalEdges) + return I != E; + + // If AllowIdenticalEdges is true, then we allow this edge to be considered + // non-critical iff all preds come from TI's block. + for (; I != E; ++I) + if (*I != FirstPred) return true; + return false; +} + +// SplitCriticalEdge - If this edge is a critical edge, insert a new node to +// split the critical edge. This will update DominatorTree, and DominatorFrontier +// information if it is available, thus calling this pass will not invalidate +// any of them. This returns true if the edge was split, false otherwise. +// This ensures that all edges to that dest go to one block instead of each +// going to a different block. +// +bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, + bool MergeIdenticalEdges) { + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false; + BasicBlock *TIBB = TI->getParent(); + BasicBlock *DestBB = TI->getSuccessor(SuccNum); + + // Create a new basic block, linking it into the CFG. + BasicBlock *NewBB = new BasicBlock(TIBB->getName() + "." + + DestBB->getName() + "_crit_edge"); + // Create our unconditional branch... + new BranchInst(DestBB, NewBB); + + // Branch to the new block, breaking the edge. + TI->setSuccessor(SuccNum, NewBB); + + // Insert the block into the function... right after the block TI lives in. + Function &F = *TIBB->getParent(); + Function::iterator FBBI = TIBB; + F.getBasicBlockList().insert(++FBBI, NewBB); + + // If there are any PHI nodes in DestBB, we need to update them so that they + // merge incoming values from NewBB instead of from TIBB. + // + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // We no longer enter through TIBB, now we come in through NewBB. Revector + // exactly one entry in the PHI node that used to come from TIBB to come + // from NewBB. + int BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + + // If there are any other edges from TIBB to DestBB, update those to go + // through the split block, making those edges non-critical as well (and + // reducing the number of phi entries in the DestBB if relevant). + if (MergeIdenticalEdges) { + for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { + if (TI->getSuccessor(i) != DestBB) continue; + + // Remove an entry for TIBB from DestBB phi nodes. + DestBB->removePredecessor(TIBB); + + // We found another edge to DestBB, go to NewBB instead. + TI->setSuccessor(i, NewBB); + } + } + + + + // If we don't have a pass object, we can't update anything... + if (P == 0) return true; + + // Now update analysis information. Since the only predecessor of NewBB is + // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate + // anything, as there are other successors of DestBB. However, if all other + // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a + // loop header) then NewBB dominates DestBB. + SmallVector<BasicBlock*, 8> OtherPreds; + + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) + if (*I != NewBB) + OtherPreds.push_back(*I); + + bool NewBBDominatesDestBB = true; + + // Should we update DominatorTree information? + if (DominatorTree *DT = P->getAnalysisToUpdate<DominatorTree>()) { + DomTreeNode *TINode = DT->getNode(TIBB); + + // The new block is not the immediate dominator for any other nodes, but + // TINode is the immediate dominator for the new node. + // + if (TINode) { // Don't break unreachable code! + DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); + DomTreeNode *DestBBNode = 0; + + // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. + if (!OtherPreds.empty()) { + DestBBNode = DT->getNode(DestBB); + while (!OtherPreds.empty() && NewBBDominatesDestBB) { + if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back())) + NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode); + OtherPreds.pop_back(); + } + OtherPreds.clear(); + } + + // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it + // doesn't dominate anything. + if (NewBBDominatesDestBB) { + if (!DestBBNode) DestBBNode = DT->getNode(DestBB); + DT->changeImmediateDominator(DestBBNode, NewBBNode); + } + } + } + + // Should we update DominanceFrontier information? + if (DominanceFrontier *DF = P->getAnalysisToUpdate<DominanceFrontier>()) { + // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. + if (!OtherPreds.empty()) { + // FIXME: IMPLEMENT THIS! + assert(0 && "Requiring domfrontiers but not idom/domtree/domset." + " not implemented yet!"); + } + + // Since the new block is dominated by its only predecessor TIBB, + // it cannot be in any block's dominance frontier. If NewBB dominates + // DestBB, its dominance frontier is the same as DestBB's, otherwise it is + // just {DestBB}. + DominanceFrontier::DomSetType NewDFSet; + if (NewBBDominatesDestBB) { + DominanceFrontier::iterator I = DF->find(DestBB); + if (I != DF->end()) + DF->addBasicBlock(NewBB, I->second); + else + DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType()); + } else { + DominanceFrontier::DomSetType NewDFSet; + NewDFSet.insert(DestBB); + DF->addBasicBlock(NewBB, NewDFSet); + } + } + + // Update LoopInfo if it is around. + if (LoopInfo *LI = P->getAnalysisToUpdate<LoopInfo>()) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (Loop *TIL = LI->getLoopFor(TIBB)) + if (Loop *DestLoop = LI->getLoopFor(DestBB)) { + if (TIL == DestLoop) { + // Both in the same loop, the NewBB joins loop. + DestLoop->addBasicBlockToLoop(NewBB, *LI); + } else if (TIL->contains(DestLoop->getHeader())) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NewBB, *LI); + } else if (DestLoop->contains(TIL->getHeader())) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NewBB, *LI); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == DestBB && + "Should not create irreducible loops!"); + if (Loop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NewBB, *LI); + } + } + } + return true; +} diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp new file mode 100644 index 0000000000..cff58ab154 --- /dev/null +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -0,0 +1,485 @@ +//===- CloneFunction.cpp - Clone a function into another function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneFunctionInto interface, which is used as the +// low-level function cloner. This is used by the CloneFunction and function +// inliner to do the dirty work of copying the body of a function around. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "ValueMapper.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/ADT/SmallVector.h" +#include <map> +using namespace llvm; + +// CloneBasicBlock - See comments in Cloning.h +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, + DenseMap<const Value*, Value*> &ValueMap, + const char *NameSuffix, Function *F, + ClonedCodeInfo *CodeInfo) { + BasicBlock *NewBB = new BasicBlock("", F); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over. + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + ValueMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= isa<CallInst>(II); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsUnwinds |= isa<UnwindInst>(BB->getTerminator()); + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->getEntryBlock(); + } + return NewBB; +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// ArgMap values. +// +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + DenseMap<const Value*, Value*> &ValueMap, + std::vector<ReturnInst*> &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); I != E; ++I) + assert(ValueMap.count(I) && "No mapping from source argument specified!"); +#endif + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. Note that we save BE this way in order to handle cloning of + // recursive functions into themselves. + // + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc, + CodeInfo); + ValueMap[&BB] = CBB; // Add basic block mapping. + + if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) + Returns.push_back(RI); + } + + // Loop over all of the instructions in the function, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]), + BE = NewFunc->end(); BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) + RemapInstruction(II, ValueMap); +} + +/// CloneFunction - Return a copy of the specified function, but without +/// embedding the function into another module. Also, any references specified +/// in the ValueMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the ValueMap, +/// the arguments are deleted from the resultant function. The ValueMap is +/// updated to include mappings from all of the instructions and basicblocks in +/// the function from their old to new values. +/// +Function *llvm::CloneFunction(const Function *F, + DenseMap<const Value*, Value*> &ValueMap, + ClonedCodeInfo *CodeInfo) { + std::vector<const Type*> ArgTypes; + + // The user might be deleting arguments to the function by specifying them in + // the ValueMap. If so, we need to not add the arguments to the arg ty vector + // + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I->getType()); + + // Create a new function type... + FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), + ArgTypes, F->getFunctionType()->isVarArg()); + + // Create the new function... + Function *NewF = new Function(FTy, F->getLinkage(), F->getName()); + + // Loop over the arguments, copying the names of the mapped arguments over... + Function::arg_iterator DestI = NewF->arg_begin(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) + if (ValueMap.count(I) == 0) { // Is this argument preserved? + DestI->setName(I->getName()); // Copy the name over... + ValueMap[I] = DestI++; // Add mapping to ValueMap + } + + std::vector<ReturnInst*> Returns; // Ignore returns cloned... + CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); + return NewF; +} + + + +namespace { + /// PruningFunctionCloner - This class is a private class used to implement + /// the CloneAndPruneFunctionInto method. + struct VISIBILITY_HIDDEN PruningFunctionCloner { + Function *NewFunc; + const Function *OldFunc; + DenseMap<const Value*, Value*> &ValueMap; + std::vector<ReturnInst*> &Returns; + const char *NameSuffix; + ClonedCodeInfo *CodeInfo; + const TargetData *TD; + + public: + PruningFunctionCloner(Function *newFunc, const Function *oldFunc, + DenseMap<const Value*, Value*> &valueMap, + std::vector<ReturnInst*> &returns, + const char *nameSuffix, + ClonedCodeInfo *codeInfo, + const TargetData *td) + : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + } + + /// CloneBlock - The specified block is found to be reachable, clone it and + /// anything that it can reach. + void CloneBlock(const BasicBlock *BB, + std::vector<const BasicBlock*> &ToClone); + + public: + /// ConstantFoldMappedInstruction - Constant fold the specified instruction, + /// mapping its operands through ValueMap if they are available. + Constant *ConstantFoldMappedInstruction(const Instruction *I); + }; +} + +/// CloneBlock - The specified block is found to be reachable, clone it and +/// anything that it can reach. +void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + std::vector<const BasicBlock*> &ToClone){ + Value *&BBEntry = ValueMap[BB]; + + // Have we already cloned this block? + if (BBEntry) return; + + // Nope, clone it now. + BasicBlock *NewBB; + BBEntry = NewBB = new BasicBlock(); + if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; + + // Loop over all instructions, and copy them over, DCE'ing as we go. This + // loop doesn't include the terminator. + for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); + II != IE; ++II) { + // If this instruction constant folds, don't bother cloning the instruction, + // instead, just add the constant to the value map. + if (Constant *C = ConstantFoldMappedInstruction(II)) { + ValueMap[II] = C; + continue; + } + + Instruction *NewInst = II->clone(); + if (II->hasName()) + NewInst->setName(II->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + ValueMap[II] = NewInst; // Add instruction map to value. + + hasCalls |= isa<CallInst>(II); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (isa<ConstantInt>(AI->getArraySize())) + hasStaticAllocas = true; + else + hasDynamicAllocas = true; + } + } + + // Finally, clone over the terminator. + const TerminatorInst *OldTI = BB->getTerminator(); + bool TerminatorDone = false; + if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { + if (BI->isConditional()) { + // If the condition was a known constant in the callee... + ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); + // Or is a known constant in the caller... + if (Cond == 0) + Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]); + + // Constant fold to uncond branch! + if (Cond) { + BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); + ValueMap[OldTI] = new BranchInst(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { + // If switching on a value known constant in the caller. + ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); + if (Cond == 0) // Or known constant after constant prop in the callee... + Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]); + if (Cond) { // Constant fold to uncond branch! + BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); + ValueMap[OldTI] = new BranchInst(Dest, NewBB); + ToClone.push_back(Dest); + TerminatorDone = true; + } + } + + if (!TerminatorDone) { + Instruction *NewInst = OldTI->clone(); + if (OldTI->hasName()) + NewInst->setName(OldTI->getName()+NameSuffix); + NewBB->getInstList().push_back(NewInst); + ValueMap[OldTI] = NewInst; // Add instruction map to value. + + // Recursively clone any reachable successor blocks. + const TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + } + + if (CodeInfo) { + CodeInfo->ContainsCalls |= hasCalls; + CodeInfo->ContainsUnwinds |= isa<UnwindInst>(OldTI); + CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && + BB != &BB->getParent()->front(); + } + + if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator())) + Returns.push_back(RI); +} + +/// ConstantFoldMappedInstruction - Constant fold the specified instruction, +/// mapping its operands through ValueMap if they are available. +Constant *PruningFunctionCloner:: +ConstantFoldMappedInstruction(const Instruction *I) { + SmallVector<Constant*, 8> Ops; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), + ValueMap))) + Ops.push_back(Op); + else + return 0; // All operands not constant! + + return ConstantFoldInstOperands(I, &Ops[0], Ops.size(), TD); +} + +/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exactly copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + DenseMap<const Value*, Value*> &ValueMap, + std::vector<ReturnInst*> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + const TargetData *TD) { + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (Function::const_arg_iterator II = OldFunc->arg_begin(), + E = OldFunc->arg_end(); II != E; ++II) + assert(ValueMap.count(II) && "No mapping from source argument specified!"); +#endif + + PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, + NameSuffix, CodeInfo, TD); + + // Clone the entry block, and anything recursively reachable from it. + std::vector<const BasicBlock*> CloneWorklist; + CloneWorklist.push_back(&OldFunc->getEntryBlock()); + while (!CloneWorklist.empty()) { + const BasicBlock *BB = CloneWorklist.back(); + CloneWorklist.pop_back(); + PFC.CloneBlock(BB, CloneWorklist); + } + + // Loop over all of the basic blocks in the old function. If the block was + // reachable, we have cloned it and the old block is now in the value map: + // insert it into the new function in the right order. If not, ignore it. + // + // Defer PHI resolution until rest of function is resolved. + std::vector<const PHINode*> PHIToResolve; + for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); + BI != BE; ++BI) { + BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); + if (NewBB == 0) continue; // Dead block. + + // Add the new block to the new function. + NewFunc->getBasicBlockList().push_back(NewBB); + + // Loop over all of the instructions in the block, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + BasicBlock::iterator I = NewBB->begin(); + + // Handle PHI nodes specially, as we have to remove references to dead + // blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + // Skip over all PHI nodes, remembering them for later. + BasicBlock::const_iterator OldI = BI->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) + PHIToResolve.push_back(cast<PHINode>(OldI)); + } + + // Otherwise, remap the rest of the instructions normally. + for (; I != NewBB->end(); ++I) + RemapInstruction(I, ValueMap); + } + + // Defer PHI resolution until rest of function is resolved, PHI resolution + // requires the CFG to be up-to-date. + for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { + const PHINode *OPN = PHIToResolve[phino]; + unsigned NumPreds = OPN->getNumIncomingValues(); + const BasicBlock *OldBB = OPN->getParent(); + BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]); + + // Map operands for blocks that are live and remove operands for blocks + // that are dead. + for (; phino != PHIToResolve.size() && + PHIToResolve[phino]->getParent() == OldBB; ++phino) { + OPN = PHIToResolve[phino]; + PHINode *PN = cast<PHINode>(ValueMap[OPN]); + for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { + if (BasicBlock *MappedBlock = + cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { + Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); + assert(InVal && "Unknown input value?"); + PN->setIncomingValue(pred, InVal); + PN->setIncomingBlock(pred, MappedBlock); + } else { + PN->removeIncomingValue(pred, false); + --pred, --e; // Revisit the next entry. + } + } + } + + // The loop above has removed PHI entries for those blocks that are dead + // and has updated others. However, if a block is live (i.e. copied over) + // but its terminator has been changed to not go to this block, then our + // phi nodes will have invalid entries. Update the PHI nodes in this + // case. + PHINode *PN = cast<PHINode>(NewBB->begin()); + NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); + if (NumPreds != PN->getNumIncomingValues()) { + assert(NumPreds < PN->getNumIncomingValues()); + // Count how many times each predecessor comes to this block. + std::map<BasicBlock*, unsigned> PredCount; + for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); + PI != E; ++PI) + --PredCount[*PI]; + + // Figure out how many entries to remove from each PHI. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + ++PredCount[PN->getIncomingBlock(i)]; + + // At this point, the excess predecessor entries are positive in the + // map. Loop over all of the PHIs and remove excess predecessor + // entries. + BasicBlock::iterator I = NewBB->begin(); + for (; (PN = dyn_cast<PHINode>(I)); ++I) { + for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), + E = PredCount.end(); PCI != E; ++PCI) { + BasicBlock *Pred = PCI->first; + for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) + PN->removeIncomingValue(Pred, false); + } + } + } + + // If the loops above have made these phi nodes have 0 or 1 operand, + // replace them with undef or the input value. We must do this for + // correctness, because 0-operand phis are not valid. + PN = cast<PHINode>(NewBB->begin()); + if (PN->getNumIncomingValues() == 0) { + BasicBlock::iterator I = NewBB->begin(); + BasicBlock::const_iterator OldI = OldBB->begin(); + while ((PN = dyn_cast<PHINode>(I++))) { + Value *NV = UndefValue::get(PN->getType()); + PN->replaceAllUsesWith(NV); + assert(ValueMap[OldI] == PN && "ValueMap mismatch"); + ValueMap[OldI] = NV; + PN->eraseFromParent(); + ++OldI; + } + } + // NOTE: We cannot eliminate single entry phi nodes here, because of + // ValueMap. Single entry phi nodes can have multiple ValueMap entries + // pointing at them. Thus, deleting one would require scanning the ValueMap + // to update any entries in it that would require that. This would be + // really slow. + } + + // Now that the inlined function body has been fully constructed, go through + // and zap unconditional fall-through branches. This happen all the time when + // specializing code: code specialization turns conditional branches into + // uncond branches, and this code folds them. + Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]); + while (I != NewFunc->end()) { + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); + if (!BI || BI->isConditional()) { ++I; continue; } + + // Note that we can't eliminate uncond branches if the destination has + // single-entry PHI nodes. Eliminating the single-entry phi nodes would + // require scanning the ValueMap to update any entries that point to the phi + // node. + BasicBlock *Dest = BI->getSuccessor(0); + if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { + ++I; continue; + } + + // We know all single-entry PHI nodes in the inlined function have been + // removed, so we just need to splice the blocks. + BI->eraseFromParent(); + + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + + // Make all PHI nodes that referred to Dest now refer to I as their source. + Dest->replaceAllUsesWith(I); + + // Remove the dest block. + Dest->eraseFromParent(); + + // Do not increment I, iteratively merge all things this block branches to. + } +} diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp new file mode 100644 index 0000000000..d64d58f383 --- /dev/null +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -0,0 +1,124 @@ +//===- CloneModule.cpp - Clone an entire module ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneModule interface which makes a copy of an +// entire module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/Constant.h" +#include "ValueMapper.h" +using namespace llvm; + +/// CloneModule - Return an exact copy of the specified module. This is not as +/// easy as it might seem because we have to worry about making copies of global +/// variables and functions, and making their (initializers and references, +/// respectively) refer to the right globals. +/// +Module *llvm::CloneModule(const Module *M) { + // Create the value map that maps things from the old module over to the new + // module. + DenseMap<const Value*, Value*> ValueMap; + return CloneModule(M, ValueMap); +} + +Module *llvm::CloneModule(const Module *M, + DenseMap<const Value*, Value*> &ValueMap) { + // First off, we need to create the new module... + Module *New = new Module(M->getModuleIdentifier()); + New->setDataLayout(M->getDataLayout()); + New->setTargetTriple(M->getTargetTriple()); + New->setModuleInlineAsm(M->getModuleInlineAsm()); + + // Copy all of the type symbol table entries over. + const TypeSymbolTable &TST = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + TI != TE; ++TI) + New->addTypeName(TI->first, TI->second); + + // Copy all of the dependent libraries over. + for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) + New->addLibrary(*I); + + // Loop over all of the global variables, making corresponding globals in the + // new module. Here we add them to the ValueMap and to the new Module. We + // don't worry about attributes or initializers, they will come later. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) + ValueMap[I] = new GlobalVariable(I->getType()->getElementType(), false, + GlobalValue::ExternalLinkage, 0, + I->getName(), New); + + // Loop over the functions in the module, making external functions as before + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *NF = + new Function(cast<FunctionType>(I->getType()->getElementType()), + GlobalValue::ExternalLinkage, I->getName(), New); + NF->setCallingConv(I->getCallingConv()); + ValueMap[I]= NF; + } + + // Loop over the aliases in the module + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, + I->getName(), NULL, New); + + // Now that all of the things that global variable initializer can refer to + // have been created, loop through and copy the global variable referrers + // over... We also set the attributes on the global now. + // + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]); + if (I->hasInitializer()) + GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), + ValueMap))); + GV->setLinkage(I->getLinkage()); + GV->setThreadLocal(I->isThreadLocal()); + GV->setConstant(I->isConstant()); + } + + // Similarly, copy over function bodies now... + // + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + Function *F = cast<Function>(ValueMap[I]); + if (!I->isDeclaration()) { + Function::arg_iterator DestI = F->arg_begin(); + for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); + ++J) { + DestI->setName(J->getName()); + ValueMap[J] = DestI++; + } + + std::vector<ReturnInst*> Returns; // Ignore returns cloned... + CloneFunctionInto(F, I, ValueMap, Returns); + } + + F->setLinkage(I->getLinkage()); + } + + // And aliases + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) { + GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]); + GA->setLinkage(I->getLinkage()); + if (const Constant* C = I->getAliasee()) + GA->setAliasee(cast<Constant>(MapValue(C, ValueMap))); + } + + return New; +} + +// vim: sw=2 diff --git a/lib/Transforms/Utils/CloneTrace.cpp b/lib/Transforms/Utils/CloneTrace.cpp new file mode 100644 index 0000000000..97e57b2ca4 --- /dev/null +++ b/lib/Transforms/Utils/CloneTrace.cpp @@ -0,0 +1,120 @@ +//===- CloneTrace.cpp - Clone a trace -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CloneTrace interface, which is used when writing +// runtime optimizations. It takes a vector of basic blocks clones the basic +// blocks, removes internal phi nodes, adds it to the same function as the +// original (although there is no jump to it) and returns the new vector of +// basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Trace.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "ValueMapper.h" +using namespace llvm; + +//Clones the trace (a vector of basic blocks) +std::vector<BasicBlock *> +llvm::CloneTrace(const std::vector<BasicBlock*> &origTrace) { + std::vector<BasicBlock *> clonedTrace; + DenseMap<const Value*, Value*> ValueMap; + + //First, loop over all the Basic Blocks in the trace and copy + //them using CloneBasicBlock. Also fix the phi nodes during + //this loop. To fix the phi nodes, we delete incoming branches + //that are not in the trace. + for(std::vector<BasicBlock *>::const_iterator T = origTrace.begin(), + End = origTrace.end(); T != End; ++T) { + + //Clone Basic Block + BasicBlock *clonedBlock = + CloneBasicBlock(*T, ValueMap, ".tr", (*T)->getParent()); + + //Add it to our new trace + clonedTrace.push_back(clonedBlock); + + //Add this new mapping to our Value Map + ValueMap[*T] = clonedBlock; + + //Loop over the phi instructions and delete operands + //that are from blocks not in the trace + //only do this if we are NOT the first block + if(T != origTrace.begin()) { + for (BasicBlock::iterator I = clonedBlock->begin(); + isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + //get incoming value for the previous BB + Value *V = PN->getIncomingValueForBlock(*(T-1)); + assert(V && "No incoming value from a BasicBlock in our trace!"); + + //remap our phi node to point to incoming value + ValueMap[*&I] = V; + + //remove phi node + clonedBlock->getInstList().erase(PN); + } + } + } + + //Second loop to do the remapping + for(std::vector<BasicBlock *>::const_iterator BB = clonedTrace.begin(), + BE = clonedTrace.end(); BB != BE; ++BB) { + for(BasicBlock::iterator I = (*BB)->begin(); I != (*BB)->end(); ++I) { + + //Loop over all the operands of the instruction + for(unsigned op=0, E = I->getNumOperands(); op != E; ++op) { + const Value *Op = I->getOperand(op); + + //Get it out of the value map + Value *V = ValueMap[Op]; + + //If not in the value map, then its outside our trace so ignore + if(V != 0) + I->setOperand(op,V); + } + } + } + + //return new vector of basic blocks + return clonedTrace; +} + +/// CloneTraceInto - Clone T into NewFunc. Original<->clone mapping is +/// saved in ValueMap. +/// +void llvm::CloneTraceInto(Function *NewFunc, Trace &T, + DenseMap<const Value*, Value*> &ValueMap, + const char *NameSuffix) { + assert(NameSuffix && "NameSuffix cannot be null!"); + + // Loop over all of the basic blocks in the trace, cloning them as + // appropriate. + // + for (Trace::const_iterator BI = T.begin(), BE = T.end(); BI != BE; ++BI) { + const BasicBlock *BB = *BI; + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(BB, ValueMap, NameSuffix, NewFunc); + ValueMap[BB] = CBB; // Add basic block mapping. + } + + // Loop over all of the instructions in the new function, fixing up operand + // references as we go. This uses ValueMap to do all the hard work. + // + for (Function::iterator BB = + cast<BasicBlock>(ValueMap[T.getEntryBasicBlock()]), + BE = NewFunc->end(); BB != BE; ++BB) + // Loop over all instructions, fixing each one as we find it... + for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) + RemapInstruction(II, ValueMap); +} + diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp new file mode 100644 index 0000000000..aaf99866b1 --- /dev/null +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -0,0 +1,737 @@ +//===- CodeExtractor.cpp - Pull code region into a new function -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface to tear out a code region, such as an +// individual loop or a parallel section, into a new function, replacing it with +// a call to the new function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <set> +using namespace llvm; + +// Provide a command-line option to aggregate function arguments into a struct +// for functions produced by the code extrator. This is useful when converting +// extracted functions to pthread-based code, as only one argument (void*) can +// be passed in to pthread_create(). +static cl::opt<bool> +AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, + cl::desc("Aggregate arguments to code-extracted functions")); + +namespace { + class VISIBILITY_HIDDEN CodeExtractor { + typedef std::vector<Value*> Values; + std::set<BasicBlock*> BlocksToExtract; + DominatorTree* DT; + bool AggregateArgs; + unsigned NumExitBlocks; + const Type *RetTy; + public: + CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false) + : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {} + + Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code); + + bool isEligible(const std::vector<BasicBlock*> &code); + + private: + /// definedInRegion - Return true if the specified value is defined in the + /// extracted region. + bool definedInRegion(Value *V) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (BlocksToExtract.count(I->getParent())) + return true; + return false; + } + + /// definedInCaller - Return true if the specified value is defined in the + /// function being code extracted, but not in the region being extracted. + /// These values must be passed in as live-ins to the function. + bool definedInCaller(Value *V) const { + if (isa<Argument>(V)) return true; + if (Instruction *I = dyn_cast<Instruction>(V)) + if (!BlocksToExtract.count(I->getParent())) + return true; + return false; + } + + void severSplitPHINodes(BasicBlock *&Header); + void splitReturnBlocks(); + void findInputsOutputs(Values &inputs, Values &outputs); + + Function *constructFunction(const Values &inputs, + const Values &outputs, + BasicBlock *header, + BasicBlock *newRootNode, BasicBlock *newHeader, + Function *oldFunction, Module *M); + + void moveCodeToFunction(Function *newFunction); + + void emitCallAndSwitchStatement(Function *newFunction, + BasicBlock *newHeader, + Values &inputs, + Values &outputs); + + }; +} + +/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the +/// region, we need to split the entry block of the region so that the PHI node +/// is easier to deal with. +void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { + bool HasPredsFromRegion = false; + unsigned NumPredsOutsideRegion = 0; + + if (Header != &Header->getParent()->getEntryBlock()) { + PHINode *PN = dyn_cast<PHINode>(Header->begin()); + if (!PN) return; // No PHI nodes. + + // If the header node contains any PHI nodes, check to see if there is more + // than one entry from outside the region. If so, we need to sever the + // header block into two. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) + HasPredsFromRegion = true; + else + ++NumPredsOutsideRegion; + + // If there is one (or fewer) predecessor from outside the region, we don't + // need to do anything special. + if (NumPredsOutsideRegion <= 1) return; + } + + // Otherwise, we need to split the header block into two pieces: one + // containing PHI nodes merging values from outside of the region, and a + // second that contains all of the code for the block and merges back any + // incoming values from inside of the region. + BasicBlock::iterator AfterPHIs = Header->begin(); + while (isa<PHINode>(AfterPHIs)) ++AfterPHIs; + BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, + Header->getName()+".ce"); + + // We only want to code extract the second block now, and it becomes the new + // header of the region. + BasicBlock *OldPred = Header; + BlocksToExtract.erase(OldPred); + BlocksToExtract.insert(NewBB); + Header = NewBB; + + // Okay, update dominator sets. The blocks that dominate the new one are the + // blocks that dominate TIBB plus the new block itself. + if (DT) + DT->splitBlock(NewBB); + + // Okay, now we need to adjust the PHI nodes and any branches from within the + // region to go to the new header block instead of the old header block. + if (HasPredsFromRegion) { + PHINode *PN = cast<PHINode>(OldPred->begin()); + // Loop over all of the predecessors of OldPred that are in the region, + // changing them to branch to NewBB instead. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); + TI->replaceUsesOfWith(OldPred, NewBB); + } + + // Okay, everthing within the region is now branching to the right block, we + // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { + PHINode *PN = cast<PHINode>(AfterPHIs); + // Create a new PHI node in the new region, which has an incoming value + // from OldPred of PN. + PHINode *NewPN = new PHINode(PN->getType(), PN->getName()+".ce", + NewBB->begin()); + NewPN->addIncoming(PN, OldPred); + + // Loop over all of the incoming value in PN, moving them to NewPN if they + // are from the extracted region. + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); + PN->removeIncomingValue(i); + --i; + } + } + } + } +} + +void CodeExtractor::splitReturnBlocks() { + for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(), + E = BlocksToExtract.end(); I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) + (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); +} + +// findInputsOutputs - Find inputs to, outputs from the code region. +// +void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { + std::set<BasicBlock*> ExitBlocks; + for (std::set<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(), + ce = BlocksToExtract.end(); ci != ce; ++ci) { + BasicBlock *BB = *ci; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) + if (definedInCaller(*O)) + inputs.push_back(*O); + + // Consider uses of this instruction (outputs). + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) + if (!definedInRegion(*UI)) { + outputs.push_back(I); + break; + } + } // for: insts + + // Keep track of the exit blocks from the region. + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!BlocksToExtract.count(TI->getSuccessor(i))) + ExitBlocks.insert(TI->getSuccessor(i)); + } // for: basic blocks + + NumExitBlocks = ExitBlocks.size(); + + // Eliminate duplicates. + std::sort(inputs.begin(), inputs.end()); + inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end()); + std::sort(outputs.begin(), outputs.end()); + outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end()); +} + +/// constructFunction - make a function based on inputs and outputs, as follows: +/// f(in0, ..., inN, out0, ..., outN) +/// +Function *CodeExtractor::constructFunction(const Values &inputs, + const Values &outputs, + BasicBlock *header, + BasicBlock *newRootNode, + BasicBlock *newHeader, + Function *oldFunction, + Module *M) { + DOUT << "inputs: " << inputs.size() << "\n"; + DOUT << "outputs: " << outputs.size() << "\n"; + + // This function returns unsigned, outputs will go back by reference. + switch (NumExitBlocks) { + case 0: + case 1: RetTy = Type::VoidTy; break; + case 2: RetTy = Type::Int1Ty; break; + default: RetTy = Type::Int16Ty; break; + } + + std::vector<const Type*> paramTy; + + // Add the types of the input values to the function's argument list + for (Values::const_iterator i = inputs.begin(), + e = inputs.end(); i != e; ++i) { + const Value *value = *i; + DOUT << "value used in func: " << *value << "\n"; + paramTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (Values::const_iterator I = outputs.begin(), E = outputs.end(); + I != E; ++I) { + DOUT << "instr used in func: " << **I << "\n"; + if (AggregateArgs) + paramTy.push_back((*I)->getType()); + else + paramTy.push_back(PointerType::get((*I)->getType())); + } + + DOUT << "Function type: " << *RetTy << " f("; + for (std::vector<const Type*>::iterator i = paramTy.begin(), + e = paramTy.end(); i != e; ++i) + DOUT << **i << ", "; + DOUT << ")\n"; + + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + PointerType *StructPtr = PointerType::get(StructType::get(paramTy)); + paramTy.clear(); + paramTy.push_back(StructPtr); + } + const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); + + // Create the new function + Function *newFunction = new Function(funcType, + GlobalValue::InternalLinkage, + oldFunction->getName() + "_" + + header->getName(), M); + newFunction->getBasicBlockList().push_back(newRootNode); + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx0 = Constant::getNullValue(Type::Int32Ty); + Value *Idx1 = ConstantInt::get(Type::Int32Ty, i); + std::string GEPname = "gep_" + inputs[i]->getName(); + TerminatorInst *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = new GetElementPtrInst(AI, Idx0, Idx1, + GEPname, TI); + RewriteVal = new LoadInst(GEP, "load" + GEPname, TI); + } else + RewriteVal = AI++; + + std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end()); + for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end(); + use != useE; ++use) + if (Instruction* inst = dyn_cast<Instruction>(*use)) + if (BlocksToExtract.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } + + // Set names for input and output arguments. + if (!AggregateArgs) { + AI = newFunction->arg_begin(); + for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) + AI->setName(inputs[i]->getName()); + for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) + AI->setName(outputs[i]->getName()+".out"); + } + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector<User*> Users(header->use_begin(), header->use_end()); + for (unsigned i = 0, e = Users.size(); i != e; ++i) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i])) + if (!BlocksToExtract.count(TI->getParent()) && + TI->getParent()->getParent() == oldFunction) + TI->replaceUsesOfWith(header, newHeader); + + return newFunction; +} + +/// emitCallAndSwitchStatement - This method sets up the caller side by adding +/// the call instruction, splitting any PHI nodes in the header block as +/// necessary. +void CodeExtractor:: +emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, + Values &inputs, Values &outputs) { + // Emit a call to the new function, passing in: *pointer to struct (if + // aggregating parameters), or plan inputs and allocated memory for outputs + std::vector<Value*> params, StructValues, ReloadOutputs; + + // Add inputs as params, or to be filled into the struct + for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) + if (AggregateArgs) + StructValues.push_back(*i); + else + params.push_back(*i); + + // Create allocas for the outputs + for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { + if (AggregateArgs) { + StructValues.push_back(*i); + } else { + AllocaInst *alloca = + new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + codeReplacer->getParent()->begin()->begin()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + AllocaInst *Struct = 0; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { + std::vector<const Type*> ArgTypes; + for (Values::iterator v = StructValues.begin(), + ve = StructValues.end(); v != ve; ++v) + ArgTypes.push_back((*v)->getType()); + + // Allocate a struct at the beginning of this function + Type *StructArgTy = StructType::get(ArgTypes); + Struct = + new AllocaInst(StructArgTy, 0, "structArg", + codeReplacer->getParent()->begin()->begin()); + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx0 = Constant::getNullValue(Type::Int32Ty); + Value *Idx1 = ConstantInt::get(Type::Int32Ty, i); + GetElementPtrInst *GEP = + new GetElementPtrInst(Struct, Idx0, Idx1, + "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + StoreInst *SI = new StoreInst(StructValues[i], GEP); + codeReplacer->getInstList().push_back(SI); + } + } + + // Emit the call to the function + CallInst *call = new CallInst(newFunction, ¶ms[0], params.size(), + NumExitBlocks > 1 ? "targetBlock" : ""); + codeReplacer->getInstList().push_back(call); + + Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); + unsigned FirstOut = inputs.size(); + if (!AggregateArgs) + std::advance(OutputArgBegin, inputs.size()); + + // Reload the outputs passed in by reference + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = 0; + if (AggregateArgs) { + Value *Idx0 = Constant::getNullValue(Type::Int32Ty); + Value *Idx1 = ConstantInt::get(Type::Int32Ty, FirstOut + i); + GetElementPtrInst *GEP + = new GetElementPtrInst(Struct, Idx0, Idx1, + "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + codeReplacer->getInstList().push_back(load); + std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast<Instruction>(Users[u]); + if (!BlocksToExtract.count(inst->getParent())) + inst->replaceUsesOfWith(outputs[i], load); + } + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + new SwitchInst(ConstantInt::getNullValue(Type::Int16Ty), + codeReplacer, 0, codeReplacer); + + // Since there may be multiple exits from the original region, make the new + // function return an unsigned, switch on that number. This loop iterates + // over all of the blocks in the extracted region, updating any terminator + // instructions in the to-be-extracted region that branch to blocks that are + // not in the region to be extracted. + std::map<BasicBlock*, BasicBlock*> ExitBlockMap; + + unsigned switchVal = 0; + for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), + e = BlocksToExtract.end(); i != e; ++i) { + TerminatorInst *TI = (*i)->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (!BlocksToExtract.count(TI->getSuccessor(i))) { + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + if (!NewTarget) { + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = new BasicBlock(OldTarget->getName() + ".exitStub", + newFunction); + unsigned SuccNum = switchVal++; + + Value *brVal = 0; + switch (NumExitBlocks) { + case 0: + case 1: break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::Int1Ty, !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::Int16Ty, SuccNum); + break; + } + + ReturnInst *NTRet = new ReturnInst(brVal, NewTarget); + + // Update the switch instruction. + TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum), + OldTarget); + + // Restore values just before we exit + Function::arg_iterator OAI = OutputArgBegin; + for (unsigned out = 0, e = outputs.size(); out != e; ++out) { + // For an invoke, the normal destination is the only one that is + // dominated by the result of the invocation + BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent(); + + bool DominatesDef = true; + + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) { + DefBlock = Invoke->getNormalDest(); + + // Make sure we are looking at the original successor block, not + // at a newly inserted exit block, which won't be in the dominator + // info. + for (std::map<BasicBlock*, BasicBlock*>::iterator I = + ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I) + if (DefBlock == I->second) { + DefBlock = I->first; + break; + } + + // In the extract block case, if the block we are extracting ends + // with an invoke instruction, make sure that we don't emit a + // store of the invoke value for the unwind block. + if (!DT && DefBlock != OldTarget) + DominatesDef = false; + } + + if (DT) + DominatesDef = DT->dominates(DefBlock, OldTarget); + + if (DominatesDef) { + if (AggregateArgs) { + Value *Idx0 = Constant::getNullValue(Type::Int32Ty); + Value *Idx1 = ConstantInt::get(Type::Int32Ty,FirstOut+out); + GetElementPtrInst *GEP = + new GetElementPtrInst(OAI, Idx0, Idx1, + "gep_" + outputs[out]->getName(), + NTRet); + new StoreInst(outputs[out], GEP, NTRet); + } else { + new StoreInst(outputs[out], OAI, NTRet); + } + } + // Advance output iterator even if we don't emit a store + if (!AggregateArgs) ++OAI; + } + } + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } + + // Now that we've done the deed, simplify the switch instruction. + const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy == Type::VoidTy) { + new ReturnInst(0, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + new ReturnInst(TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + new ReturnInst(Constant::getNullValue(OldFnRetTy), TheSwitch); + } + + TheSwitch->getParent()->getInstList().erase(TheSwitch); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + new BranchInst(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->getParent()->getInstList().erase(TheSwitch); + break; + case 2: + new BranchInst(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->getParent()->getInstList().erase(TheSwitch); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setOperand(0, call); + TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks)); + TheSwitch->removeCase(NumExitBlocks); // Remove redundant case + break; + } +} + +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*BlocksToExtract.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); + + for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), + e = BlocksToExtract.end(); i != e; ++i) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(*i); + + // Insert this basic block into the new function + newBlocks.push_back(*i); + } +} + +/// ExtractRegion - Removes a loop from a function, replaces it with a call to +/// new function. Returns pointer to the new function. +/// +/// algorithm: +/// +/// find inputs and outputs for the region +/// +/// for inputs: add to function as args, map input instr* to arg# +/// for outputs: add allocas for scalars, +/// add to func as args, map output instr* to arg# +/// +/// rewrite func to use argument #s instead of instr* +/// +/// for each scalar output in the function: at every exit, store intermediate +/// computed result back into memory. +/// +Function *CodeExtractor:: +ExtractCodeRegion(const std::vector<BasicBlock*> &code) { + if (!isEligible(code)) + return 0; + + // 1) Find inputs, outputs + // 2) Construct new function + // * Add allocas for defs, pass as args by reference + // * Pass in uses as args + // 3) Move code region, add call instr to func + // + BlocksToExtract.insert(code.begin(), code.end()); + + Values inputs, outputs; + + // Assumption: this is a single-entry code region, and the header is the first + // block in the region. + BasicBlock *header = code[0]; + + for (unsigned i = 1, e = code.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]); + PI != E; ++PI) + assert(BlocksToExtract.count(*PI) && + "No blocks in this region may have entries from outside the region" + " except for the first block!"); + + // If we have to split PHI nodes or the entry block, do so now. + severSplitPHINodes(header); + + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + Function *oldFunction = header->getParent(); + + // This takes place of the original loop + BasicBlock *codeReplacer = new BasicBlock("codeRepl", oldFunction, header); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = new BasicBlock("newFuncRoot"); + newFuncRoot->getInstList().push_back(new BranchInst(header)); + + // Find inputs to, outputs from the code region. + findInputsOutputs(inputs, outputs); + + // Construct new function based on inputs/outputs & add allocas for all defs. + Function *newFunction = constructFunction(inputs, outputs, header, + newFuncRoot, + codeReplacer, oldFunction, + oldFunction->getParent()); + + emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + + moveCodeToFunction(newFunction); + + // Loop over all of the PHI nodes in the header block, and change any + // references to the old incoming edge to be the new incoming edge. + for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!BlocksToExtract.count(PN->getIncomingBlock(i))) + PN->setIncomingBlock(i, newFuncRoot); + } + + // Look at all successors of the codeReplacer block. If any of these blocks + // had PHI nodes in them, we need to update the "from" block to be the code + // replacer, not the original block in the extracted region. + std::vector<BasicBlock*> Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) + for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + std::set<BasicBlock*> ProcessedPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (BlocksToExtract.count(PN->getIncomingBlock(i))) + if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) + PN->setIncomingBlock(i, codeReplacer); + else { + // There were multiple entries in the PHI for this block, now there + // is only one, so remove the duplicated entries. + PN->removeIncomingValue(i, false); + --i; --e; + } + } + + //cerr << "NEW FUNCTION: " << *newFunction; + // verifyFunction(*newFunction); + + // cerr << "OLD FUNCTION: " << *oldFunction; + // verifyFunction(*oldFunction); + + DEBUG(if (verifyFunction(*newFunction)) abort()); + return newFunction; +} + +bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) { + // Deny code region if it contains allocas or vastarts. + for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end(); + BB != e; ++BB) + for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end(); + I != Ie; ++I) + if (isa<AllocaInst>(*I)) + return false; + else if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) + return false; + return true; +} + + +/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new +/// function +/// +Function* llvm::ExtractCodeRegion(DominatorTree &DT, + const std::vector<BasicBlock*> &code, + bool AggregateArgs) { + return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code); +} + +/// ExtractBasicBlock - slurp a natural loop into a brand new function +/// +Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) { + return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks()); +} + +/// ExtractBasicBlock - slurp a basic block into a brand new function +/// +Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) { + std::vector<BasicBlock*> Blocks; + Blocks.push_back(BB); + return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks); +} diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp new file mode 100644 index 0000000000..df332b289d --- /dev/null +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -0,0 +1,133 @@ +//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provide the function DemoteRegToStack(). This function takes a +// virtual register computed by an Instruction and replaces it with a slot in +// the stack frame, allocated via alloca. It returns the pointer to the +// AllocaInst inserted. After this function is called on an instruction, we are +// guaranteed that the only user of the instruction is a store that is +// immediately after it. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include <map> +using namespace llvm; + +/// DemoteRegToStack - This function takes a virtual register computed by an +/// Instruction and replaces it with a slot in the stack frame, allocated via +/// alloca. This allows the CFG to be changed around without fear of +/// invalidating the SSA information for the value. It returns the pointer to +/// the alloca inserted to create a stack slot for I. +/// +AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads) { + if (I.use_empty()) return 0; // nothing to do! + + // Create a stack slot to hold the value. + Function *F = I.getParent()->getParent(); + AllocaInst *Slot = new AllocaInst(I.getType(), 0, I.getName(), + F->getEntryBlock().begin()); + + // Change all of the users of the instruction to read from the stack slot + // instead. + while (!I.use_empty()) { + Instruction *U = cast<Instruction>(I.use_back()); + if (PHINode *PN = dyn_cast<PHINode>(U)) { + // If this is a PHI node, we can't insert a load of the value before the + // use. Instead, insert the load in the predecessor block corresponding + // to the incoming value. + // + // Note that if there are multiple edges from a basic block to this PHI + // node that we cannot multiple loads. The problem is that the resultant + // PHI node will have multiple values (from each load) coming in from the + // same block, which is illegal SSA form. For this reason, we keep track + // and reuse loads we insert. + std::map<BasicBlock*, Value*> Loads; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == &I) { + Value *&V = Loads[PN->getIncomingBlock(i)]; + if (V == 0) { + // Insert the load into the predecessor block + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + PN->getIncomingBlock(i)->getTerminator()); + } + PN->setIncomingValue(i, V); + } + + } else { + // If this is a normal instruction, just insert a load. + Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U); + U->replaceUsesOfWith(&I, V); + } + } + + + // Insert stores of the computed value into the stack slot. We have to be + // careful is I is an invoke instruction though, because we can't insert the + // store AFTER the terminator instruction. + BasicBlock::iterator InsertPt; + if (!isa<TerminatorInst>(I)) { + InsertPt = &I; + ++InsertPt; + } else { + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. + InvokeInst &II = cast<InvokeInst>(I); + assert(II.getNormalDest()->getSinglePredecessor() && + "Cannot demote invoke with a critical successor!"); + InsertPt = II.getNormalDest()->begin(); + } + + for (; isa<PHINode>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before any PHI nodes. + new StoreInst(&I, Slot, InsertPt); + + return Slot; +} + + +/// DemotePHIToStack - This function takes a virtual register computed by a phi +/// node and replaces it with a slot in the stack frame, allocated via alloca. +/// The phi node is deleted and it returns the pointer to the alloca inserted. +AllocaInst* llvm::DemotePHIToStack(PHINode *P) { + if (P->use_empty()) { + P->eraseFromParent(); + return 0; + } + + // Create a stack slot to hold the value. + Function *F = P->getParent()->getParent(); + AllocaInst *Slot = new AllocaInst(P->getType(), 0, P->getName(), + F->getEntryBlock().begin()); + + // Iterate over each operand, insert store in each predecessor. + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { + assert(II->getParent() != P->getIncomingBlock(i) && + "Invoke edge not supported yet"); + } + new StoreInst(P->getIncomingValue(i), Slot, + P->getIncomingBlock(i)->getTerminator()); + } + + // Insert load in place of the phi and replace all uses. + BasicBlock::iterator InsertPt; + for (InsertPt = P->getParent()->getInstList().begin(); + isa<PHINode>(InsertPt); ++InsertPt); + Value *V = new LoadInst(Slot, P->getName()+".reload", P); + P->replaceAllUsesWith(V); + + // Delete phi. + P->eraseFromParent(); + + return Slot; +} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 0000000000..9735a2fcda --- /dev/null +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -0,0 +1,496 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CallSite.h" +using namespace llvm; + +bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) { + return InlineFunction(CallSite(CI), CG, TD); +} +bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { + return InlineFunction(CallSite(II), CG, TD); +} + +/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes and turn unwind +/// instructions into branches to the invoke unwind dest. +/// +/// II is the invoke instruction begin inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *InvokeDest = II->getUnwindDest(); + std::vector<Value*> InvokeDestPHIValues; + + // If there are PHI nodes in the unwind destination block, we need to + // keep track of which values came into them from this invoke, then remove + // the entry for this block. + BasicBlock *InvokeBlock = II->getParent(); + for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // Save the value to use for this edge. + InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock)); + } + + Function *Caller = FirstNewBlock->getParent(); + + // The inlined code is currently at the end of the function, scan from the + // start of the inlined code to its end, checking for stuff we need to + // rewrite. + if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) { + if (InlinedCodeInfo.ContainsCalls) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){ + Instruction *I = BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + if (!isa<CallInst>(I)) continue; + CallInst *CI = cast<CallInst>(I); + + // If this is an intrinsic function call or an inline asm, don't + // convert it to an invoke. + if ((CI->getCalledFunction() && + CI->getCalledFunction()->getIntrinsicID()) || + isa<InlineAsm>(CI->getCalledValue())) + continue; + + // Convert this function call into an invoke instruction. + // First, split the basic block. + BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + + // Next, create the new invoke instruction, inserting it at the end + // of the old basic block. + SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); + InvokeInst *II = + new InvokeInst(CI->getCalledValue(), Split, InvokeDest, + &InvokeArgs[0], InvokeArgs.size(), + CI->getName(), BB->getTerminator()); + II->setCallingConv(CI->getCallingConv()); + + // Make sure that anything using the call now uses the invoke! + CI->replaceAllUsesWith(II); + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + Split->getInstList().pop_front(); // Delete the original call + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa<PHINode>(I); ++I, ++i) { + PHINode *PN = cast<PHINode>(I); + PN->addIncoming(InvokeDestPHIValues[i], BB); + } + + // This basic block is now complete, start scanning the next one. + break; + } + } + + if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + // An UnwindInst requires special handling when it gets inlined into an + // invoke site. Once this happens, we know that the unwind would cause + // a control transfer to the invoke exception destination, so we can + // transform it into a direct branch to the exception destination. + new BranchInst(InvokeDest, UI); + + // Delete the unwind instruction! + UI->getParent()->getInstList().pop_back(); + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa<PHINode>(I); ++I, ++i) { + PHINode *PN = cast<PHINode>(I); + PN->addIncoming(InvokeDestPHIValues[i], BB); + } + } + } + } + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); +} + +/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee +/// into the caller, update the specified callgraph to reflect the changes we +/// made. Note that it's possible that not all code was copied over, so only +/// some edges of the callgraph will be remain. +static void UpdateCallGraphAfterInlining(const Function *Caller, + const Function *Callee, + Function::iterator FirstNewBlock, + DenseMap<const Value*, Value*> &ValueMap, + CallGraph &CG) { + // Update the call graph by deleting the edge from Callee to Caller + CallGraphNode *CalleeNode = CG[Callee]; + CallGraphNode *CallerNode = CG[Caller]; + CallerNode->removeCallEdgeTo(CalleeNode); + + // Since we inlined some uninlined call sites in the callee into the caller, + // add edges from the caller to all of the callees of the callee. + for (CallGraphNode::iterator I = CalleeNode->begin(), + E = CalleeNode->end(); I != E; ++I) { + const Instruction *OrigCall = I->first.getInstruction(); + + DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall); + // Only copy the edge if the call was inlined! + if (VMI != ValueMap.end() && VMI->second) { + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second)) + CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); + } + } +} + + +// InlineFunction - This function inlines the called function into the basic +// block of the caller. This returns false if it is not possible to inline this +// call. The program is still in a well defined state if this occurs though. +// +// Note that this only does one level of inlining. For example, if the +// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +// exists in the instruction stream. Similiarly this will inline a recursive +// function by one level. +// +bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { + Instruction *TheCall = CS.getInstruction(); + assert(TheCall->getParent() && TheCall->getParent()->getParent() && + "Instruction not in function!"); + + const Function *CalledFunc = CS.getCalledFunction(); + if (CalledFunc == 0 || // Can't inline external function or indirect + CalledFunc->isDeclaration() || // call, or call to a vararg function! + CalledFunc->getFunctionType()->isVarArg()) return false; + + + // If the call to the callee is a non-tail call, we must clear the 'tail' + // flags on any calls that we inline. + bool MustClearTailCallFlags = + isa<CallInst>(TheCall) && !cast<CallInst>(TheCall)->isTailCall(); + + BasicBlock *OrigBB = TheCall->getParent(); + Function *Caller = OrigBB->getParent(); + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + // + Function::iterator LastBlock = &Caller->back(); + + // Make sure to capture all of the return instructions from the cloned + // function. + std::vector<ReturnInst*> Returns; + ClonedCodeInfo InlinedFunctionInfo; + Function::iterator FirstNewBlock; + + { // Scope to destroy ValueMap after cloning. + DenseMap<const Value*, Value*> ValueMap; + + // Calculate the vector of arguments to pass into the function cloner, which + // matches up the formal to the actual argument values. + assert(std::distance(CalledFunc->arg_begin(), CalledFunc->arg_end()) == + std::distance(CS.arg_begin(), CS.arg_end()) && + "No varargs calls can be inlined!"); + CallSite::arg_iterator AI = CS.arg_begin(); + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I, ++AI) + ValueMap[I] = *AI; + + // We want the inliner to prune the code as it copies. We would LOVE to + // have no dead or constant instructions leftover after inlining occurs + // (which can happen, e.g., because an argument was constant), but we'll be + // happy with whatever the cloner can do. + CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", + &InlinedFunctionInfo, TD); + + // Remember the first block that is newly cloned over. + FirstNewBlock = LastBlock; ++FirstNewBlock; + + // Update the callgraph if requested. + if (CG) + UpdateCallGraphAfterInlining(Caller, CalledFunc, FirstNewBlock, ValueMap, + *CG); + } + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + // + { + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) { + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } + + if (isa<Constant>(AI->getArraySize())) { + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa<AllocaInst>(I) && + isa<Constant>(cast<AllocaInst>(I)->getArraySize())) + ++I; + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice( + InsertPoint, + FirstNewBlock->getInstList(), + AI, I); + } + } + } + + // If the inlined code contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (InlinedFunctionInfo.ContainsDynamicAllocas) { + Module *M = Caller->getParent(); + const Type *BytePtr = PointerType::get(Type::Int8Ty); + // Get the two intrinsics we care about. + Constant *StackSave, *StackRestore; + StackSave = M->getOrInsertFunction("llvm.stacksave", BytePtr, NULL); + StackRestore = M->getOrInsertFunction("llvm.stackrestore", Type::VoidTy, + BytePtr, NULL); + + // If we are preserving the callgraph, add edges to the stacksave/restore + // functions for the calls we insert. + CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; + if (CG) { + // We know that StackSave/StackRestore are Function*'s, because they are + // intrinsics which must have the right types. + StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave)); + StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore)); + CallerNode = (*CG)[Caller]; + } + + // Insert the llvm.stacksave. + CallInst *SavedPtr = new CallInst(StackSave, "savedstack", + FirstNewBlock->begin()); + if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); + + // Insert a call to llvm.stackrestore before any return instructions in the + // inlined function. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + CallInst *CI = new CallInst(StackRestore, SavedPtr, "", Returns[i]); + if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); + } + + // Count the number of StackRestore calls we insert. + unsigned NumStackRestores = Returns.size(); + + // If we are inlining an invoke instruction, insert restores before each + // unwind. These unwinds will be rewritten into branches later. + if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + new CallInst(StackRestore, SavedPtr, "", UI); + ++NumStackRestores; + } + } + } + + // If we are inlining tail call instruction through a call site that isn't + // marked 'tail', we must remove the tail marker for any calls in the inlined + // code. + if (MustClearTailCallFlags && InlinedFunctionInfo.ContainsCalls) { + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); + BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) + CI->setTailCall(false); + } + + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any inlined 'unwind' instructions into branches to the invoke exception + // destination, and call instructions into invoke instructions. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) + HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + + // If we cloned in _exactly one_ basic block, and if that block ends in a + // return instruction, we splice the body of the inlined callee directly into + // the calling basic block. + if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { + // Move all of the instructions right before the call. + OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), + FirstNewBlock->begin(), FirstNewBlock->end()); + // Remove the cloned basic block. + Caller->getBasicBlockList().pop_back(); + + // If the call site was an invoke instruction, add a branch to the normal + // destination. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) + new BranchInst(II->getNormalDest(), TheCall); + + // If the return instruction returned a value, replace uses of the call with + // uses of the returned value. + if (!TheCall->use_empty()) + TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->getParent()->getInstList().erase(TheCall); + + // Since we are now done with the return instruction, delete it also. + Returns[0]->getParent()->getInstList().erase(Returns[0]); + + // We are now done with the inlining. + return true; + } + + // Otherwise, we have the normal case, of more than one block to inline or + // multiple return sites. + + // We want to clone the entire callee function into the hole between the + // "starter" and "ender" blocks. How we accomplish this depends on whether + // this is an invoke instruction or a call instruction. + BasicBlock *AfterCallBB; + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + + // Add an unconditional branch to make this look like the CallInst case... + BranchInst *NewBr = new BranchInst(II->getNormalDest(), TheCall); + + // Split the basic block. This guarantees that no PHI nodes will have to be + // updated due to new incoming edges, and make the invoke case more + // symmetric to the call case. + AfterCallBB = OrigBB->splitBasicBlock(NewBr, + CalledFunc->getName()+".exit"); + + } else { // It's a call + // If this is a call instruction, we need to split the basic block that + // the call lives in. + // + AfterCallBB = OrigBB->splitBasicBlock(TheCall, + CalledFunc->getName()+".exit"); + } + + // Change the branch that used to go to AfterCallBB to branch to the first + // basic block of the inlined function. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, FirstNewBlock); + + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + // + Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), + FirstNewBlock, Caller->end()); + + // Handle all of the return instructions that we just cloned in, and eliminate + // any users of the original call/invoke instruction. + if (Returns.size() > 1) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + // + PHINode *PHI = 0; + if (!TheCall->use_empty()) { + PHI = new PHINode(CalledFunc->getReturnType(), + TheCall->getName(), AfterCallBB->begin()); + + // Anything that used the result of the function call should now use the + // PHI node as their operand. + // + TheCall->replaceAllUsesWith(PHI); + } + + // Loop over all of the return instructions, turning them into unconditional + // branches to the merge point now, and adding entries to the PHI node as + // appropriate. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + + if (PHI) { + assert(RI->getReturnValue() && "Ret should have value!"); + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), RI->getParent()); + } + + // Add a branch to the merge point where the PHI node lives if it exists. + new BranchInst(AfterCallBB, RI); + + // Delete the return instruction now + RI->getParent()->getInstList().erase(RI); + } + + } else if (!Returns.empty()) { + // Otherwise, if there is exactly one return value, just replace anything + // using the return value of the call with the computed value. + if (!TheCall->use_empty()) + TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); + + // Splice the code from the return block into the block that it will return + // to, which contains the code that was after the call. + BasicBlock *ReturnBB = Returns[0]->getParent(); + AfterCallBB->getInstList().splice(AfterCallBB->begin(), + ReturnBB->getInstList()); + + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + ReturnBB->replaceAllUsesWith(AfterCallBB); + + // Delete the return instruction now and empty ReturnBB now. + Returns[0]->eraseFromParent(); + ReturnBB->eraseFromParent(); + } else if (!TheCall->use_empty()) { + // No returns, but something is using the return value of the call. Just + // nuke the result. + TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); + } + + // Since we are now done with the Call/Invoke, we can delete it. + TheCall->eraseFromParent(); + + // We should always be able to fold the entry block of the function into the + // single predecessor of the block... + assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); + BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); + + // Splice the code entry block into calling block, right before the + // unconditional branch. + OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); + CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + + // Remove the unconditional branch. + OrigBB->getInstList().erase(Br); + + // Now we can remove the CalleeEntry block, which is now empty. + Caller->getBasicBlockList().erase(CalleeEntry); + + return true; +} diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp new file mode 100644 index 0000000000..220241df33 --- /dev/null +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -0,0 +1,269 @@ +//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Owen Anderson and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass transforms loops by placing phi nodes at the end of the loops for +// all values that are live across the loop boundary. For example, it turns +// the left into the right code: +// +// for (...) for (...) +// if (c) if (c) +// X1 = ... X1 = ... +// else else +// X2 = ... X2 = ... +// X3 = phi(X1, X2) X3 = phi(X1, X2) +// ... = X3 + 4 X4 = phi(X3) +// ... = X4 + 4 +// +// This is still valid LLVM; the extra phi nodes are purely redundant, and will +// be trivially eliminated by InstCombine. The major benefit of this +// transformation is that it makes many other loop optimizations, such as +// LoopUnswitching, simpler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lcssa" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +#include <map> +using namespace llvm; + +STATISTIC(NumLCSSA, "Number of live out of a loop variables"); + +namespace { + struct VISIBILITY_HIDDEN LCSSA : public LoopPass { + static char ID; // Pass identification, replacement for typeid + LCSSA() : LoopPass((intptr_t)&ID) {} + + // Cached analysis information for the current function. + LoopInfo *LI; + DominatorTree *DT; + std::vector<BasicBlock*> LoopBlocks; + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + void ProcessInstruction(Instruction* Instr, + const std::vector<BasicBlock*>& exitBlocks); + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. It maintains both of these, + /// as well as the CFG. It also requires dominator information. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addRequired<DominatorTree>(); + AU.addPreserved<ScalarEvolution>(); + } + private: + void getLoopValuesUsedOutsideLoop(Loop *L, + SetVector<Instruction*> &AffectedValues); + + Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, + std::map<DomTreeNode*, Value*> &Phis); + + /// inLoop - returns true if the given block is within the current loop + const bool inLoop(BasicBlock* B) { + return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B); + } + }; + + char LCSSA::ID = 0; + RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass"); +} + +LoopPass *llvm::createLCSSAPass() { return new LCSSA(); } +const PassInfo *llvm::LCSSAID = X.getPassInfo(); + +/// runOnFunction - Process all loops in the function, inner-most out. +bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) { + + LI = &LPM.getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); + + // Speed up queries by creating a sorted list of blocks + LoopBlocks.clear(); + LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); + std::sort(LoopBlocks.begin(), LoopBlocks.end()); + + SetVector<Instruction*> AffectedValues; + getLoopValuesUsedOutsideLoop(L, AffectedValues); + + // If no values are affected, we can save a lot of work, since we know that + // nothing will be changed. + if (AffectedValues.empty()) + return false; + + std::vector<BasicBlock*> exitBlocks; + L->getExitBlocks(exitBlocks); + + + // Iterate over all affected values for this loop and insert Phi nodes + // for them in the appropriate exit blocks + + for (SetVector<Instruction*>::iterator I = AffectedValues.begin(), + E = AffectedValues.end(); I != E; ++I) + ProcessInstruction(*I, exitBlocks); + + assert(L->isLCSSAForm()); + + return true; +} + +/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes, +/// eliminate all out-of-loop uses. +void LCSSA::ProcessInstruction(Instruction *Instr, + const std::vector<BasicBlock*>& exitBlocks) { + ++NumLCSSA; // We are applying the transformation + + // Keep track of the blocks that have the value available already. + std::map<DomTreeNode*, Value*> Phis; + + DomTreeNode *InstrNode = DT->getNode(Instr->getParent()); + + // Insert the LCSSA phi's into the exit blocks (dominated by the value), and + // add them to the Phi's map. + for (std::vector<BasicBlock*>::const_iterator BBI = exitBlocks.begin(), + BBE = exitBlocks.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = *BBI; + DomTreeNode *ExitBBNode = DT->getNode(BB); + Value *&Phi = Phis[ExitBBNode]; + if (!Phi && DT->dominates(InstrNode, ExitBBNode)) { + PHINode *PN = new PHINode(Instr->getType(), Instr->getName()+".lcssa", + BB->begin()); + PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB))); + + // Remember that this phi makes the value alive in this block. + Phi = PN; + + // Add inputs from inside the loop for this PHI. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + PN->addIncoming(Instr, *PI); + } + } + + + // Record all uses of Instr outside the loop. We need to rewrite these. The + // LCSSA phis won't be included because they use the value in the loop. + for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end(); + UI != E;) { + BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(*UI)) { + unsigned OperandNo = UI.getOperandNo(); + UserBB = P->getIncomingBlock(OperandNo/2); + } + + // If the user is in the loop, don't rewrite it! + if (UserBB == Instr->getParent() || inLoop(UserBB)) { + ++UI; + continue; + } + + // Otherwise, patch up uses of the value with the appropriate LCSSA Phi, + // inserting PHI nodes into join points where needed. + Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis); + + // Preincrement the iterator to avoid invalidating it when we change the + // value. + Use &U = UI.getUse(); + ++UI; + U.set(Val); + } +} + +/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that +/// are used by instructions outside of it. +void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L, + SetVector<Instruction*> &AffectedValues) { + // FIXME: For large loops, we may be able to avoid a lot of use-scanning + // by using dominance information. In particular, if a block does not + // dominate any of the loop exits, then none of the values defined in the + // block could be used outside the loop. + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); + if (PHINode* p = dyn_cast<PHINode>(*UI)) { + unsigned OperandNo = UI.getOperandNo(); + UserBB = p->getIncomingBlock(OperandNo/2); + } + + if (*BB != UserBB && !inLoop(UserBB)) { + AffectedValues.insert(I); + break; + } + } + } +} + +/// GetValueForBlock - Get the value to use within the specified basic block. +/// available values are in Phis. +Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, + std::map<DomTreeNode*, Value*> &Phis) { + // If there is no dominator info for this BB, it is unreachable. + if (BB == 0) + return UndefValue::get(OrigInst->getType()); + + // If we have already computed this value, return the previously computed val. + Value *&V = Phis[BB]; + if (V) return V; + + DomTreeNode *IDom = BB->getIDom(); + + // If the block has no dominator, bail + if (!IDom) + return V = UndefValue::get(OrigInst->getType()); + + // Otherwise, there are two cases: we either have to insert a PHI node or we + // don't. We need to insert a PHI node if this block is not dominated by one + // of the exit nodes from the loop (the loop could have multiple exits, and + // though the value defined *inside* the loop dominated all its uses, each + // exit by itself may not dominate all the uses). + // + // The simplest way to check for this condition is by checking to see if the + // idom is in the loop. If so, we *know* that none of the exit blocks + // dominate this block. Note that we *know* that the block defining the + // original instruction is in the idom chain, because if it weren't, then the + // original value didn't dominate this use. + if (!inLoop(IDom->getBlock())) { + // Idom is not in the loop, we must still be "below" the exit block and must + // be fully dominated by the value live in the idom. + return V = GetValueForBlock(IDom, OrigInst, Phis); + } + + BasicBlock *BBN = BB->getBlock(); + + // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so + // now, then get values to fill in the incoming values for the PHI. + PHINode *PN = new PHINode(OrigInst->getType(), OrigInst->getName()+".lcssa", + BBN->begin()); + PN->reserveOperandSpace(std::distance(pred_begin(BBN), pred_end(BBN))); + V = PN; + + // Fill in the incoming values for the block. + for (pred_iterator PI = pred_begin(BBN), E = pred_end(BBN); PI != E; ++PI) + PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI); + return PN; +} + diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp new file mode 100644 index 0000000000..5e2d2375cc --- /dev/null +++ b/lib/Transforms/Utils/Local.cpp @@ -0,0 +1,200 @@ +//===-- Local.cpp - Functions to perform local transformations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform various local transformations to the +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include <cerrno> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Local constant propagation... +// + +/// doConstantPropagation - If an instruction references constants, try to fold +/// them together... +/// +bool llvm::doConstantPropagation(BasicBlock::iterator &II, + const TargetData *TD) { + if (Constant *C = ConstantFoldInstruction(II, TD)) { + // Replaces all of the uses of a variable with uses of the constant. + II->replaceAllUsesWith(C); + + // Remove the instruction from the basic block... + II = II->getParent()->getInstList().erase(II); + return true; + } + + return false; +} + +// ConstantFoldTerminator - If a terminator instruction is predicated on a +// constant value, convert it into an unconditional branch to the constant +// destination. +// +bool llvm::ConstantFoldTerminator(BasicBlock *BB) { + TerminatorInst *T = BB->getTerminator(); + + // Branch - See if we are conditional jumping on constant + if (BranchInst *BI = dyn_cast<BranchInst>(T)) { + if (BI->isUnconditional()) return false; // Can't optimize uncond branch + BasicBlock *Dest1 = cast<BasicBlock>(BI->getOperand(0)); + BasicBlock *Dest2 = cast<BasicBlock>(BI->getOperand(1)); + + if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { + // Are we branching on constant? + // YES. Change to unconditional branch... + BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; + BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; + + //cerr << "Function: " << T->getParent()->getParent() + // << "\nRemoving branch from " << T->getParent() + // << "\n\nTo: " << OldDest << endl; + + // Let the basic block know that we are letting go of it. Based on this, + // it will adjust it's PHI nodes. + assert(BI->getParent() && "Terminator not inserted in block!"); + OldDest->removePredecessor(BI->getParent()); + + // Set the unconditional destination, and change the insn to be an + // unconditional branch. + BI->setUnconditionalDest(Destination); + return true; + } else if (Dest2 == Dest1) { // Conditional branch to same location? + // This branch matches something like this: + // br bool %cond, label %Dest, label %Dest + // and changes it into: br label %Dest + + // Let the basic block know that we are letting go of one copy of it. + assert(BI->getParent() && "Terminator not inserted in block!"); + Dest1->removePredecessor(BI->getParent()); + + // Change a conditional branch to unconditional. + BI->setUnconditionalDest(Dest1); + return true; + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { + // If we are switching on a constant, we can convert the switch into a + // single branch instruction! + ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); + BasicBlock *TheOnlyDest = SI->getSuccessor(0); // The default dest + BasicBlock *DefaultDest = TheOnlyDest; + assert(TheOnlyDest == SI->getDefaultDest() && + "Default destination is not successor #0?"); + + // Figure out which case it goes to... + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + // Found case matching a constant operand? + if (SI->getSuccessorValue(i) == CI) { + TheOnlyDest = SI->getSuccessor(i); + break; + } + + // Check to see if this branch is going to the same place as the default + // dest. If so, eliminate it as an explicit compare. + if (SI->getSuccessor(i) == DefaultDest) { + // Remove this entry... + DefaultDest->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; // Don't skip an entry... + continue; + } + + // Otherwise, check to see if the switch only branches to one destination. + // We do this by reseting "TheOnlyDest" to null when we find two non-equal + // destinations. + if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0; + } + + if (CI && !TheOnlyDest) { + // Branching on a constant, but not any of the cases, go to the default + // successor. + TheOnlyDest = SI->getDefaultDest(); + } + + // If we found a single destination that we can fold the switch into, do so + // now. + if (TheOnlyDest) { + // Insert the new branch.. + new BranchInst(TheOnlyDest, SI); + BasicBlock *BB = SI->getParent(); + + // Remove entries from PHI nodes which we no longer branch to... + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + // Found case matching a constant operand? + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == TheOnlyDest) + TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest + else + Succ->removePredecessor(BB); + } + + // Delete the old switch... + BB->getInstList().erase(SI); + return true; + } else if (SI->getNumSuccessors() == 2) { + // Otherwise, we can fold this switch into a conditional branch + // instruction if it has only one non-default destination. + Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(), + SI->getSuccessorValue(1), "cond", SI); + // Insert the new branch... + new BranchInst(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); + + // Delete the old switch... + SI->getParent()->getInstList().erase(SI); + return true; + } + } + return false; +} + + +//===----------------------------------------------------------------------===// +// Local dead code elimination... +// + +bool llvm::isInstructionTriviallyDead(Instruction *I) { + if (!I->use_empty() || isa<TerminatorInst>(I)) return false; + + if (!I->mayWriteToMemory()) return true; + + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) { + unsigned IntrinsicID = F->getIntrinsicID(); +#define GET_SIDE_EFFECT_INFO +#include "llvm/Intrinsics.gen" +#undef GET_SIDE_EFFECT_INFO + } + return false; +} + +// dceInstruction - Inspect the instruction at *BBI and figure out if it's +// [trivially] dead. If so, remove the instruction and update the iterator +// to point to the instruction that immediately succeeded the original +// instruction. +// +bool llvm::dceInstruction(BasicBlock::iterator &BBI) { + // Look for un"used" definitions... + if (isInstructionTriviallyDead(BBI)) { + BBI = BBI->getParent()->getInstList().erase(BBI); // Bye bye + return true; + } + return false; +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp new file mode 100644 index 0000000000..0a5de2b43e --- /dev/null +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -0,0 +1,692 @@ +//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loopsimplify" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DepthFirstIterator.h" +using namespace llvm; + +STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); +STATISTIC(NumNested , "Number of nested loops split out"); + +namespace { + struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass((intptr_t)&ID) {} + + // AA - If we have an alias analysis object to update, this is it, otherwise + // this is null. + AliasAnalysis *AA; + LoopInfo *LI; + DominatorTree *DT; + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // We need loop information to identify the loops... + AU.addRequired<LoopInfo>(); + AU.addRequired<DominatorTree>(); + + AU.addPreserved<LoopInfo>(); + AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominanceFrontier>(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + } + private: + bool ProcessLoop(Loop *L); + BasicBlock *SplitBlockPredecessors(BasicBlock *BB, const char *Suffix, + const std::vector<BasicBlock*> &Preds); + BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); + void InsertPreheaderForLoop(Loop *L); + Loop *SeparateNestedLoop(Loop *L); + void InsertUniqueBackedgeBlock(Loop *L); + void PlaceSplitBlockCarefully(BasicBlock *NewBB, + std::vector<BasicBlock*> &SplitPreds, + Loop *L); + }; + + char LoopSimplify::ID = 0; + RegisterPass<LoopSimplify> + X("loopsimplify", "Canonicalize natural loops", true); +} + +// Publically exposed interface to pass... +const PassInfo *llvm::LoopSimplifyID = X.getPassInfo(); +FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnFunction(Function &F) { + bool Changed = false; + LI = &getAnalysis<LoopInfo>(); + AA = getAnalysisToUpdate<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + + // Check to see that no blocks (other than the header) in loops have + // predecessors that are not in loops. This is not valid for natural loops, + // but can occur if the blocks are unreachable. Since they are unreachable we + // can just shamelessly destroy their terminators to make them not branch into + // the loop! + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + // This case can only occur for unreachable blocks. Blocks that are + // unreachable can't be in loops, so filter those blocks out. + if (LI->getLoopFor(BB)) continue; + + bool BlockUnreachable = false; + TerminatorInst *TI = BB->getTerminator(); + + // Check to see if any successors of this block are non-loop-header loops + // that are not the header. + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + // If this successor is not in a loop, BB is clearly ok. + Loop *L = LI->getLoopFor(TI->getSuccessor(i)); + if (!L) continue; + + // If the succ is the loop header, and if L is a top-level loop, then this + // is an entrance into a loop through the header, which is also ok. + if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0) + continue; + + // Otherwise, this is an entrance into a loop from some place invalid. + // Either the loop structure is invalid and this is not a natural loop (in + // which case the compiler is buggy somewhere else) or BB is unreachable. + BlockUnreachable = true; + break; + } + + // If this block is ok, check the next one. + if (!BlockUnreachable) continue; + + // Otherwise, this block is dead. To clean up the CFG and to allow later + // loop transformations to ignore this case, we delete the edges into the + // loop by replacing the terminator. + + // Remove PHI entries from the successors. + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + TI->getSuccessor(i)->removePredecessor(BB); + + // Add a new unreachable instruction. + new UnreachableInst(TI); + + // Delete the dead terminator. + if (AA) AA->deleteValue(&BB->back()); + BB->getInstList().pop_back(); + Changed |= true; + } + + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= ProcessLoop(*I); + + return Changed; +} + +/// ProcessLoop - Walk the loop structure in depth first order, ensuring that +/// all loops have preheaders. +/// +bool LoopSimplify::ProcessLoop(Loop *L) { + bool Changed = false; +ReprocessLoop: + + // Canonicalize inner loops before outer loops. Inner loop canonicalization + // can provide work for the outer loop to canonicalize. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= ProcessLoop(*I); + + assert(L->getBlocks()[0] == L->getHeader() && + "Header isn't first block in loop?"); + + // Does the loop already have a preheader? If so, don't insert one. + if (L->getLoopPreheader() == 0) { + InsertPreheaderForLoop(L); + NumInserted++; + Changed = true; + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + std::vector<BasicBlock*> ExitBlocks; + L->getExitBlocks(ExitBlocks); + + SetVector<BasicBlock*> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); + for (SetVector<BasicBlock*>::iterator I = ExitBlockSet.begin(), + E = ExitBlockSet.end(); I != E; ++I) { + BasicBlock *ExitBlock = *I; + for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); + PI != PE; ++PI) + // Must be exactly this loop: no subloops, parent loops, or non-loop preds + // allowed. + if (!L->contains(*PI)) { + RewriteLoopExitBlock(L, ExitBlock); + NumInserted++; + Changed = true; + break; + } + } + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + unsigned NumBackedges = L->getNumBackEdges(); + if (NumBackedges != 1) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (NumBackedges < 8) { + if (Loop *NL = SeparateNestedLoop(L)) { + ++NumNested; + // This is a big restructuring change, reprocess the whole loop. + ProcessLoop(NL); + Changed = true; + // GCC doesn't tail recursion eliminate this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + InsertUniqueBackedgeBlock(L); + NumInserted++; + Changed = true; + } + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast<PHINode>(I++)); ) + if (Value *V = PN->hasConstantValue()) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + } + + return Changed; +} + +/// SplitBlockPredecessors - Split the specified block into two blocks. We want +/// to move the predecessors specified in the Preds list to point to the new +/// block, leaving the remaining predecessors pointing to BB. This method +/// updates the SSA PHINode's, but no other analyses. +/// +BasicBlock *LoopSimplify::SplitBlockPredecessors(BasicBlock *BB, + const char *Suffix, + const std::vector<BasicBlock*> &Preds) { + + // Create new basic block, insert right before the original block... + BasicBlock *NewBB = new BasicBlock(BB->getName()+Suffix, BB->getParent(), BB); + + // The preheader first gets an unconditional branch to the loop header... + BranchInst *BI = new BranchInst(BB, NewBB); + + // For every PHI node in the block, insert a PHI node into NewBB where the + // incoming values from the out of loop edges are moved to NewBB. We have two + // possible cases here. If the loop is dead, we just insert dummy entries + // into the PHI nodes for the new edge. If the loop is not dead, we move the + // incoming edges in BB into new PHI nodes in NewBB. + // + if (!Preds.empty()) { // Is the loop not obviously dead? + // Check to see if the values being merged into the new block need PHI + // nodes. If so, insert them. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I); + ++I; + + // Check to see if all of the values coming in are the same. If so, we + // don't need to create a new PHI node. + Value *InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 1, e = Preds.size(); i != e; ++i) + if (InVal != PN->getIncomingValueForBlock(Preds[i])) { + InVal = 0; + break; + } + + // If the values coming into the block are not the same, we need a PHI. + if (InVal == 0) { + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = new PHINode(PN->getType(), PN->getName()+".ph", BI); + if (AA) AA->copyValue(PN, NewPHI); + + // Move all of the edges from blocks outside the loop to the new PHI + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + Value *V = PN->removeIncomingValue(Preds[i], false); + NewPHI->addIncoming(V, Preds[i]); + } + InVal = NewPHI; + } else { + // Remove all of the edges coming into the PHI nodes from outside of the + // block. + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + PN->removeIncomingValue(Preds[i], false); + } + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + + // Can we eliminate this phi node now? + if (Value *V = PN->hasConstantValue(true)) { + Instruction *I = dyn_cast<Instruction>(V); + // If I is in NewBB, the Dominator call will fail, because NewBB isn't + // registered in DominatorTree yet. Handle this case explicitly. + if (!I || (I->getParent() != NewBB && + getAnalysis<DominatorTree>().dominates(I, PN))) { + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + BB->getInstList().erase(PN); + } + } + } + + // Now that the PHI nodes are updated, actually move the edges from + // Preds to point to NewBB instead of BB. + // + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) + if (TI->getSuccessor(s) == BB) + TI->setSuccessor(s, NewBB); + } + + } else { // Otherwise the loop is dead... + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // Insert dummy values as the incoming value... + PN->addIncoming(Constant::getNullValue(PN->getType()), NewBB); + } + } + + return NewBB; +} + +/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a +/// preheader, this method is called to insert one. This method has two phases: +/// preheader insertion and analysis updating. +/// +void LoopSimplify::InsertPreheaderForLoop(Loop *L) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + std::vector<BasicBlock*> OutsideBlocks; + for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); + PI != PE; ++PI) + if (!L->contains(*PI)) // Coming in from outside the loop? + OutsideBlocks.push_back(*PI); // Keep track of it... + + // Split out the loop pre-header. + BasicBlock *NewBB = + SplitBlockPredecessors(Header, ".preheader", OutsideBlocks); + + + //===--------------------------------------------------------------------===// + // Update analysis results now that we have performed the transformation + // + + // We know that we have loop information to update... update it now. + if (Loop *Parent = L->getParentLoop()) + Parent->addBasicBlockToLoop(NewBB, *LI); + + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>()) + DF->splitBlock(NewBB); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); +} + +/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit +/// blocks. This method is used to split exit blocks that have predecessors +/// outside of the loop. +BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { + std::vector<BasicBlock*> LoopBlocks; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) + if (L->contains(*I)) + LoopBlocks.push_back(*I); + + assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); + BasicBlock *NewBB = SplitBlockPredecessors(Exit, ".loopexit", LoopBlocks); + + // Update Loop Information - we know that the new block will be in whichever + // loop the Exit block is in. Note that it may not be in that immediate loop, + // if the successor is some other loop header. In that case, we continue + // walking up the loop tree to find a loop that contains both the successor + // block and the predecessor block. + Loop *SuccLoop = LI->getLoopFor(Exit); + while (SuccLoop && !SuccLoop->contains(L->getHeader())) + SuccLoop = SuccLoop->getParentLoop(); + if (SuccLoop) + SuccLoop->addBasicBlockToLoop(NewBB, *LI); + + // Update Dominator Information + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>()) + DF->splitBlock(NewBB); + + return NewBB; +} + +/// AddBlockAndPredsToSet - Add the specified block, and all of its +/// predecessors, to the specified set, if it's not already in there. Stop +/// predecessor traversal when we reach StopBlock. +static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, + std::set<BasicBlock*> &Blocks) { + std::vector<BasicBlock *> WorkList; + WorkList.push_back(InputBB); + do { + BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + if (Blocks.insert(BB).second && BB != StopBlock) + // If BB is not already processed and it is not a stop block then + // insert its predecessor in the work list + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *WBB = *I; + WorkList.push_back(WBB); + } + } while(!WorkList.empty()); +} + +/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a +/// PHI node that tells us how to partition the loops. +static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AliasAnalysis *AA) { + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I); + ++I; + if (Value *V = PN->hasConstantValue()) + if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + PN->eraseFromParent(); + continue; + } + + // Scan this PHI node looking for a use of the PHI node by itself. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN && + L->contains(PN->getIncomingBlock(i))) + // We found something tasty to remove. + return PN; + } + return 0; +} + +// PlaceSplitBlockCarefully - If the block isn't already, move the new block to +// right after some 'outside block' block. This prevents the preheader from +// being placed inside the loop body, e.g. when the loop hasn't been rotated. +void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, + std::vector<BasicBlock*>&SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = NewBB; --BBI; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = 0; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]; + if (++BBI != NewBB->getParent()->end() && + L->contains(BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); +} + + +/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of +/// them out into a nested loop. This is important for code that looks like +/// this: +/// +/// Loop: +/// ... +/// br cond, Loop, Next +/// ... +/// br cond2, Loop, Out +/// +/// To identify this common case, we look at the PHI nodes in the header of the +/// loop. PHI nodes with unchanging values on one backedge correspond to values +/// that change in the "outer" loop, but not in the "inner" loop. +/// +/// If we are able to separate out a loop, return the new outer loop that was +/// created. +/// +Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); + if (PN == 0) return 0; // No known way to partition. + + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + std::vector<BasicBlock*> OuterLoopPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + + BasicBlock *Header = L->getHeader(); + BasicBlock *NewBB = SplitBlockPredecessors(Header, ".outer", OuterLoopPreds); + + // Update dominator information + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>()) + DF->splitBlock(NewBB); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); + + // Create the new outer loop. + Loop *NewOuter = new Loop(); + + // Change the parent loop to use the outer loop as its child now. + if (Loop *Parent = L->getParentLoop()) + Parent->replaceChildLoopWith(L, NewOuter); + else + LI->changeTopLevelLoop(L, NewOuter); + + // This block is going to be our new header block: add it to this loop and all + // parent loops. + NewOuter->addBasicBlockToLoop(NewBB, *LI); + + // L is now a subloop of our outer loop. + NewOuter->addChildLoop(L); + + for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) + NewOuter->addBlockEntry(L->getBlocks()[i]); + + // Determine which blocks should stay in L and which should be moved out to + // the Outer loop now. + std::set<BasicBlock*> BlocksInL; + for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) + if (DT->dominates(Header, *PI)) + AddBlockAndPredsToSet(*PI, Header, BlocksInL); + + + // Scan all of the loop children of L, moving them to OuterLoop if they are + // not part of the inner loop. + const std::vector<Loop*> &SubLoops = L->getSubLoops(); + for (size_t I = 0; I != SubLoops.size(); ) + if (BlocksInL.count(SubLoops[I]->getHeader())) + ++I; // Loop remains in L + else + NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + + // Now that we know which blocks are in L and which need to be moved to + // OuterLoop, move any blocks that need it. + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + BasicBlock *BB = L->getBlocks()[i]; + if (!BlocksInL.count(BB)) { + // Move this block to the parent, updating the exit blocks sets + L->removeBlockFromLoop(BB); + if ((*LI)[BB] == L) + LI->changeLoopFor(BB, NewOuter); + --i; + } + } + + return NewOuter; +} + + + +/// InsertUniqueBackedgeBlock - This method is called when the specified loop +/// has more than one backedge in it. If this occurs, revector all of these +/// backedges to target a new basic block and have that block branch to the loop +/// header. This ensures that loops have exactly one backedge. +/// +void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { + assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); + + // Get information about the loop + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + Function *F = Header->getParent(); + + // Figure out which basic blocks contain back-edges to the loop header. + std::vector<BasicBlock*> BackedgeBlocks; + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I) + if (*I != Preheader) BackedgeBlocks.push_back(*I); + + // Create and insert the new backedge block... + BasicBlock *BEBlock = new BasicBlock(Header->getName()+".backedge", F); + BranchInst *BETerminator = new BranchInst(Header, BEBlock); + + // Move the new backedge block to right after the last backedge block. + Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; + F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); + + // Now that the block has been inserted into the function, create PHI nodes in + // the backedge block which correspond to any PHI nodes in the header block. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + PHINode *NewPN = new PHINode(PN->getType(), PN->getName()+".be", + BETerminator); + NewPN->reserveOperandSpace(BackedgeBlocks.size()); + if (AA) AA->copyValue(PN, NewPN); + + // Loop over the PHI node, moving all entries except the one for the + // preheader over to the new PHI node. + unsigned PreheaderIdx = ~0U; + bool HasUniqueIncomingValue = true; + Value *UniqueValue = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *IBB = PN->getIncomingBlock(i); + Value *IV = PN->getIncomingValue(i); + if (IBB == Preheader) { + PreheaderIdx = i; + } else { + NewPN->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (UniqueValue == 0) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Delete all of the incoming values from the old PN except the preheader's + assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); + if (PreheaderIdx != 0) { + PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); + PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); + } + // Nuke all entries except the zero'th. + for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) + PN->removeIncomingValue(e-i, false); + + // Finally, add the newly constructed PHI node as the entry for the BEBlock. + PN->addIncoming(NewPN, BEBlock); + + // As an optimization, if all incoming values in the new PhiNode (which is a + // subset of the incoming values of the old PHI node) have the same value, + // eliminate the PHI Node. + if (HasUniqueIncomingValue) { + NewPN->replaceAllUsesWith(UniqueValue); + if (AA) AA->deleteValue(NewPN); + BEBlock->getInstList().erase(NewPN); + } + } + + // Now that all of the PHI nodes have been inserted and adjusted, modify the + // backedge blocks to just to the BEBlock instead of the header. + for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { + TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); + for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) + if (TI->getSuccessor(Op) == Header) + TI->setSuccessor(Op, BEBlock); + } + + //===--- Update all analyses which we must preserve now -----------------===// + + // Update Loop Information - we know that this block is now in the current + // loop and all parent loops. + L->addBasicBlockToLoop(BEBlock, *LI); + + // Update dominator information + DT->splitBlock(BEBlock); + if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>()) + DF->splitBlock(BEBlock); +} + + diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp new file mode 100644 index 0000000000..7ce247909c --- /dev/null +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -0,0 +1,176 @@ +//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LowerAllocations transformation is a target-dependent tranformation +// because it depends on the size of data types and alignment constraints. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowerallocs" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumLowered, "Number of allocations lowered"); + +namespace { + /// LowerAllocations - Turn malloc and free instructions into %malloc and + /// %free calls. + /// + class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { + Constant *MallocFunc; // Functions in the module we are processing + Constant *FreeFunc; // Initialized by doInitialization + bool LowerMallocArgToInteger; + public: + static char ID; // Pass ID, replacement for typeid + LowerAllocations(bool LowerToInt = false) + : BasicBlockPass((intptr_t)&ID), MallocFunc(0), FreeFunc(0), + LowerMallocArgToInteger(LowerToInt) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetData>(); + AU.setPreservesCFG(); + + // This is a cluster of orthogonal Transforms: + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSelectID); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + } + + /// doPassInitialization - For the lower allocations pass, this ensures that + /// a module contains a declaration for a malloc and a free function. + /// + bool doInitialization(Module &M); + + virtual bool doInitialization(Function &F) { + return BasicBlockPass::doInitialization(F); + } + + /// runOnBasicBlock - This method does the actual work of converting + /// instructions over, assuming that the pass has already been initialized. + /// + bool runOnBasicBlock(BasicBlock &BB); + }; + + char LowerAllocations::ID = 0; + RegisterPass<LowerAllocations> + X("lowerallocs", "Lower allocations from instructions to calls"); +} + +// Publically exposed interface to pass... +const PassInfo *llvm::LowerAllocationsID = X.getPassInfo(); +// createLowerAllocationsPass - Interface to this file... +Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) { + return new LowerAllocations(LowerMallocArgToInteger); +} + + +// doInitialization - For the lower allocations pass, this ensures that a +// module contains a declaration for a malloc and a free function. +// +// This function is always successful. +// +bool LowerAllocations::doInitialization(Module &M) { + const Type *BPTy = PointerType::get(Type::Int8Ty); + // Prototype malloc as "char* malloc(...)", because we don't know in + // doInitialization whether size_t is int or long. + FunctionType *FT = FunctionType::get(BPTy, std::vector<const Type*>(), true); + MallocFunc = M.getOrInsertFunction("malloc", FT); + FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, BPTy, (Type *)0); + return true; +} + +// runOnBasicBlock - This method does the actual work of converting +// instructions over, assuming that the pass has already been initialized. +// +bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + assert(MallocFunc && FreeFunc && "Pass not initialized!"); + + BasicBlock::InstListType &BBIL = BB.getInstList(); + + const TargetData &TD = getAnalysis<TargetData>(); + const Type *IntPtrTy = TD.getIntPtrType(); + + // Loop over all of the instructions, looking for malloc or free instructions + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + if (MallocInst *MI = dyn_cast<MallocInst>(I)) { + const Type *AllocTy = MI->getType()->getElementType(); + + // malloc(type) becomes sbyte *malloc(size) + Value *MallocArg; + if (LowerMallocArgToInteger) + MallocArg = ConstantInt::get(Type::Int64Ty, TD.getTypeSize(AllocTy)); + else + MallocArg = ConstantExpr::getSizeOf(AllocTy); + MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg), + IntPtrTy); + + if (MI->isArrayAllocation()) { + if (isa<ConstantInt>(MallocArg) && + cast<ConstantInt>(MallocArg)->isOne()) { + MallocArg = MI->getOperand(0); // Operand * 1 = Operand + } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) { + CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/); + MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg)); + } else { + Value *Scale = MI->getOperand(0); + if (Scale->getType() != IntPtrTy) + Scale = CastInst::createIntegerCast(Scale, IntPtrTy, false /*ZExt*/, + "", I); + + // Multiply it by the array size if necessary... + MallocArg = BinaryOperator::create(Instruction::Mul, Scale, + MallocArg, "", I); + } + } + + // Create the call to Malloc. + CallInst *MCall = new CallInst(MallocFunc, MallocArg, "", I); + MCall->setTailCall(); + + // Create a cast instruction to convert to the right type... + Value *MCast; + if (MCall->getType() != Type::VoidTy) + MCast = new BitCastInst(MCall, MI->getType(), "", I); + else + MCast = Constant::getNullValue(MI->getType()); + + // Replace all uses of the old malloc inst with the cast inst + MI->replaceAllUsesWith(MCast); + I = --BBIL.erase(I); // remove and delete the malloc instr... + Changed = true; + ++NumLowered; + } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) { + Value *PtrCast = new BitCastInst(FI->getOperand(0), + PointerType::get(Type::Int8Ty), "", I); + + // Insert a call to the free function... + (new CallInst(FreeFunc, PtrCast, "", I))->setTailCall(); + + // Delete the old free instruction + I = --BBIL.erase(I); + Changed = true; + ++NumLowered; + } + } + + return Changed; +} + diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp new file mode 100644 index 0000000000..d72c018a22 --- /dev/null +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -0,0 +1,585 @@ +//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which do not yet +// support stack unwinding. This pass supports two models of exception handling +// lowering, the 'cheap' support and the 'expensive' support. +// +// 'Cheap' exception handling support gives the program the ability to execute +// any program which does not "throw an exception", by turning 'invoke' +// instructions into calls and by turning 'unwind' instructions into calls to +// abort(). If the program does dynamically use the unwind instruction, the +// program will print a message then abort. +// +// 'Expensive' exception handling support gives the full exception handling +// support to the program at the cost of making the 'invoke' instruction +// really expensive. It basically inserts setjmp/longjmp calls to emulate the +// exception handling as necessary. +// +// Because the 'expensive' support slows down programs a lot, and EH is only +// used for a subset of the programs, it must be specifically enabled by an +// option. +// +// Note that after this pass runs the CFG is not entirely accurate (exceptional +// control flow edges are not correct anymore) so only very simple things should +// be done after the lowerinvoke pass has run (like generation of native code). +// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't +// support the invoke instruction yet" lowering pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lowerinvoke" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetLowering.h" +#include <csetjmp> +#include <set> +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumUnwinds, "Number of unwinds replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", + cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code")); + +namespace { + class VISIBILITY_HIDDEN LowerInvoke : public FunctionPass { + // Used for both models. + Constant *WriteFn; + Constant *AbortFn; + Value *AbortMessage; + unsigned AbortMessageLength; + + // Used for expensive EH support. + const Type *JBLinkTy; + GlobalVariable *JBListHead; + Constant *SetJmpFn, *LongJmpFn; + + // We peek in TLI to grab the target's jmp_buf size and alignment + const TargetLowering *TLI; + + public: + static char ID; // Pass identification, replacement for typeid + LowerInvoke(const TargetLowering *tli = NULL) : FunctionPass((intptr_t)&ID), + TLI(tli) { } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSelectID); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerAllocationsID); + } + + private: + void createAbortMessage(Module *M); + void writeAbortMessage(Instruction *IB); + bool insertCheapEHSupport(Function &F); + void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes); + void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, SwitchInst *CatchSwitch); + bool insertExpensiveEHSupport(Function &F); + }; + + char LowerInvoke::ID = 0; + RegisterPass<LowerInvoke> + X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators"); +} + +const PassInfo *llvm::LowerInvokePassID = X.getPassInfo(); + +// Public Interface To the LowerInvoke pass. +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { + return new LowerInvoke(TLI); +} + +// doInitialization - Make sure that there is a prototype for abort in the +// current module. +bool LowerInvoke::doInitialization(Module &M) { + const Type *VoidPtrTy = PointerType::get(Type::Int8Ty); + AbortMessage = 0; + if (ExpensiveEHSupport) { + // Insert a type for the linked list of jump buffers. + unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; + JBSize = JBSize ? JBSize : 200; + const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + { // The type is recursive, so use a type holder. + std::vector<const Type*> Elements; + Elements.push_back(JmpBufTy); + OpaqueType *OT = OpaqueType::get(); + Elements.push_back(PointerType::get(OT)); + PATypeHolder JBLType(StructType::get(Elements)); + OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. + JBLinkTy = JBLType.get(); + M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); + } + + const Type *PtrJBList = PointerType::get(JBLinkTy); + + // Now that we've done that, insert the jmpbuf list head global, unless it + // already exists. + if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { + JBListHead = new GlobalVariable(PtrJBList, false, + GlobalValue::LinkOnceLinkage, + Constant::getNullValue(PtrJBList), + "llvm.sjljeh.jblist", &M); + } + SetJmpFn = M.getOrInsertFunction("llvm.setjmp", Type::Int32Ty, + PointerType::get(JmpBufTy), (Type *)0); + LongJmpFn = M.getOrInsertFunction("llvm.longjmp", Type::VoidTy, + PointerType::get(JmpBufTy), + Type::Int32Ty, (Type *)0); + } + + // We need the 'write' and 'abort' functions for both models. + AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0); +#if 0 // "write" is Unix-specific.. code is going away soon anyway. + WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, + VoidPtrTy, Type::Int32Ty, (Type *)0); +#else + WriteFn = 0; +#endif + return true; +} + +void LowerInvoke::createAbortMessage(Module *M) { + if (ExpensiveEHSupport) { + // The abort message for expensive EH support tells the user that the + // program 'unwound' without an 'invoke' instruction. + Constant *Msg = + ConstantArray::get("ERROR: Exception thrown, but not caught!\n"); + AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 + + GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalValue::InternalLinkage, + Msg, "abortmsg", M); + std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); + } else { + // The abort message for cheap EH support tells the user that EH is not + // enabled. + Constant *Msg = + ConstantArray::get("Exception handler needed, but not enabled. Recompile" + " program with -enable-correct-eh-support.\n"); + AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 + + GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalValue::InternalLinkage, + Msg, "abortmsg", M); + std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); + } +} + + +void LowerInvoke::writeAbortMessage(Instruction *IB) { +#if 0 + if (AbortMessage == 0) + createAbortMessage(IB->getParent()->getParent()->getParent()); + + // These are the arguments we WANT... + Value* Args[3]; + Args[0] = ConstantInt::get(Type::Int32Ty, 2); + Args[1] = AbortMessage; + Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength); + (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall(); +#endif +} + +bool LowerInvoke::insertCheapEHSupport(Function &F) { + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end()); + // Insert a normal call instruction... + CallInst *NewCall = new CallInst(II->getCalledValue(), + &CallArgs[0], CallArgs.size(), "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + II->replaceAllUsesWith(NewCall); + + // Insert an unconditional branch to the normal destination. + new BranchInst(II->getNormalDest(), II); + + // Remove any PHI node entries from the exception destination. + II->getUnwindDest()->removePredecessor(BB); + + // Remove the invoke instruction now. + BB->getInstList().erase(II); + + ++NumInvokes; Changed = true; + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + // Insert a new call to write(2, AbortMessage, AbortMessageLength); + writeAbortMessage(UI); + + // Insert a call to abort() + (new CallInst(AbortFn, "", UI))->setTailCall(); + + // Insert a return instruction. This really should be a "barrier", as it + // is unreachable. + new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 : + Constant::getNullValue(F.getReturnType()), UI); + + // Remove the unwind instruction now. + BB->getInstList().erase(UI); + + ++NumUnwinds; Changed = true; + } + return Changed; +} + +/// rewriteExpensiveInvoke - Insert code and hack the function to replace the +/// specified invoke instruction with a call. +void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, + AllocaInst *InvokeNum, + SwitchInst *CatchSwitch) { + ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo); + + // Insert a store of the invoke num before the invoke and store zero into the + // location afterward. + new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile + + BasicBlock::iterator NI = II->getNormalDest()->begin(); + while (isa<PHINode>(NI)) ++NI; + // nonvolatile. + new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI); + + // Add a switch case to our unwind block. + CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); + + // Insert a normal call instruction. + std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end()); + CallInst *NewCall = new CallInst(II->getCalledValue(), + &CallArgs[0], CallArgs.size(), "", + II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + II->replaceAllUsesWith(NewCall); + + // Replace the invoke with an uncond branch. + new BranchInst(II->getNormalDest(), NewCall->getParent()); + II->eraseFromParent(); +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +// First thing we need to do is scan the whole function for values that are +// live across unwind edges. Each value that is live across an unwind edge +// we spill into a stack location, guaranteeing that there is nothing live +// across the unwind edge. This process also splits all critical edges +// coming out of invoke's. +void LowerInvoke:: +splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa<PHINode>(II->getNormalDest()) && + !isa<PHINode>(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsertPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + // This is always a no-op cast because we're casting AI to AI->getType() so + // src and destination types are identical. BitCast is the only possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Normally its is forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, we're + // replacing it here with the same value it was constructed with to simply + // make NC its user. + NC->setOperand(0, AI); + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + std::vector<Instruction*> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Scan all of the uses and see if the live range is live across an unwind + // edge. If we find a use live across an invoke edge, create an alloca + // and spill the value. + std::set<InvokeInst*> InvokesWithStoreInserted; + + // Find all of the blocks that this value is live in. + std::set<BasicBlock*> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool LowerInvoke::insertExpensiveEHSupport(Function &F) { + std::vector<ReturnInst*> Returns; + std::vector<UnwindInst*> Unwinds; + std::vector<InvokeInst*> Invokes; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Invokes.push_back(II); + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + Unwinds.push_back(UI); + } + + if (Unwinds.empty() && Invokes.empty()) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + // TODO: This is not an optimal way to do this. In particular, this always + // inserts setjmp calls into the entries of functions with invoke instructions + // even though there are possibly paths through the function that do not + // execute any invokes. In particular, for functions with early exits, e.g. + // the 'addMove' method in hexxagon, it would be nice to not have to do the + // setjmp stuff on the early exit path. This requires a bit of dataflow, but + // would not be too hard to do. + + // If we have an invoke instruction, insert a setjmp that dominates all + // invokes. After the setjmp, use a cond branch that goes to the original + // code path on zero, and to a designated 'catch' block of nonzero. + Value *OldJmpBufPtr = 0; + if (!Invokes.empty()) { + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesLiveAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be live across invokes. + unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; + AllocaInst *JmpBuf = + new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); + + std::vector<Value*> Idx; + Idx.push_back(Constant::getNullValue(Type::Int32Ty)); + Idx.push_back(ConstantInt::get(Type::Int32Ty, 1)); + OldJmpBufPtr = new GetElementPtrInst(JmpBuf, &Idx[0], 2, "OldBuf", + EntryBB->getTerminator()); + + // Copy the JBListHead to the alloca. + Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, + EntryBB->getTerminator()); + new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator()); + + // Add the new jumpbuf to the list. + new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator()); + + // Create the catch block. The catch block is basically a big switch + // statement that goes to all of the invoke catch blocks. + BasicBlock *CatchBB = new BasicBlock("setjmp.catch", &F); + + // Create an alloca which keeps track of which invoke is currently + // executing. For normal calls it contains zero. + AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum", + EntryBB->begin()); + new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true, + EntryBB->getTerminator()); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). + BasicBlock *UnwindBB = new BasicBlock("unwindbb", &F); + Unwinds.push_back(new UnwindInst(UnwindBB)); + + Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); + SwitchInst *CatchSwitch = + new SwitchInst(CatchLoad, UnwindBB, Invokes.size(), CatchBB); + + // Now that things are set up, insert the setjmp call itself. + + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "setjmp.cont"); + + Idx[1] = ConstantInt::get(Type::Int32Ty, 0); + Value *JmpBufPtr = new GetElementPtrInst(JmpBuf, &Idx[0], Idx.size(), + "TheJmpBuf", + EntryBB->getTerminator()); + Value *SJRet = new CallInst(SetJmpFn, JmpBufPtr, "sjret", + EntryBB->getTerminator()); + + // Compare the return value to zero. + Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet, + Constant::getNullValue(SJRet->getType()), + "notunwind", EntryBB->getTerminator()); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + new BranchInst(ContBlock, CatchBB, IsNormal, EntryBB); + + // At this point, we are all set up, rewrite each invoke instruction. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch); + } + + // We know that there is at least one unwind. + + // Create three new blocks, the block to load the jmpbuf ptr and compare + // against null, the block to do the longjmp, and the error block for if it + // is null. Add them at the end of the function because they are not hot. + BasicBlock *UnwindHandler = new BasicBlock("dounwind", &F); + BasicBlock *UnwindBlock = new BasicBlock("unwind", &F); + BasicBlock *TermBlock = new BasicBlock("unwinderror", &F); + + // If this function contains an invoke, restore the old jumpbuf ptr. + Value *BufPtr; + if (OldJmpBufPtr) { + // Before the return, insert a copy from the saved value to the new value. + BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler); + new StoreInst(BufPtr, JBListHead, UnwindHandler); + } else { + BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler); + } + + // Load the JBList, if it's null, then there was no catch! + Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr, + Constant::getNullValue(BufPtr->getType()), + "notnull", UnwindHandler); + new BranchInst(UnwindBlock, TermBlock, NotNull, UnwindHandler); + + // Create the block to do the longjmp. + // Get a pointer to the jmpbuf and longjmp. + std::vector<Value*> Idx; + Idx.push_back(Constant::getNullValue(Type::Int32Ty)); + Idx.push_back(ConstantInt::get(Type::Int32Ty, 0)); + Idx[0] = new GetElementPtrInst(BufPtr, &Idx[0], 2, "JmpBuf", UnwindBlock); + Idx[1] = ConstantInt::get(Type::Int32Ty, 1); + new CallInst(LongJmpFn, &Idx[0], Idx.size(), "", UnwindBlock); + new UnreachableInst(UnwindBlock); + + // Set up the term block ("throw without a catch"). + new UnreachableInst(TermBlock); + + // Insert a new call to write(2, AbortMessage, AbortMessageLength); + writeAbortMessage(TermBlock->getTerminator()); + + // Insert a call to abort() + (new CallInst(AbortFn, "", + TermBlock->getTerminator()))->setTailCall(); + + + // Replace all unwinds with a branch to the unwind handler. + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + new BranchInst(UnwindHandler, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, restore the old jmpbuf pointer to its input value. + if (OldJmpBufPtr) { + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *R = Returns[i]; + + // Before the return, insert a copy from the saved value to the new value. + Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R); + new StoreInst(OldBuf, JBListHead, true, R); + } + } + + return true; +} + +bool LowerInvoke::runOnFunction(Function &F) { + if (ExpensiveEHSupport) + return insertExpensiveEHSupport(F); + else + return insertCheapEHSupport(F); +} diff --git a/lib/Transforms/Utils/LowerSelect.cpp b/lib/Transforms/Utils/LowerSelect.cpp new file mode 100644 index 0000000000..1882695d07 --- /dev/null +++ b/lib/Transforms/Utils/LowerSelect.cpp @@ -0,0 +1,105 @@ +//===- LowerSelect.cpp - Transform select insts to branches ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers select instructions into conditional branches for targets +// that do not have conditional moves or that have not implemented the select +// instruction yet. +// +// Note that this pass could be improved. In particular it turns every select +// instruction into a new conditional branch, even though some common cases have +// select instructions on the same predicate next to each other. It would be +// better to use the same branch for the whole group of selects. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + /// LowerSelect - Turn select instructions into conditional branches. + /// + class VISIBILITY_HIDDEN LowerSelect : public FunctionPass { + bool OnlyFP; // Only lower FP select instructions? + public: + static char ID; // Pass identification, replacement for typeid + LowerSelect(bool onlyfp = false) : FunctionPass((intptr_t)&ID), + OnlyFP(onlyfp) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This certainly destroys the CFG. + // This is a cluster of orthogonal Transforms: + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + AU.addPreservedID(LowerAllocationsID); + } + + bool runOnFunction(Function &F); + }; + + char LowerSelect::ID = 0; + RegisterPass<LowerSelect> + X("lowerselect", "Lower select instructions to branches"); +} + +// Publically exposed interface to pass... +const PassInfo *llvm::LowerSelectID = X.getPassInfo(); +//===----------------------------------------------------------------------===// +// This pass converts SelectInst instructions into conditional branch and PHI +// instructions. If the OnlyFP flag is set to true, then only floating point +// select instructions are lowered. +// +FunctionPass *llvm::createLowerSelectPass(bool OnlyFP) { + return new LowerSelect(OnlyFP); +} + + +bool LowerSelect::runOnFunction(Function &F) { + bool Changed = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (SelectInst *SI = dyn_cast<SelectInst>(I)) + if (!OnlyFP || SI->getType()->isFloatingPoint()) { + // Split this basic block in half right before the select instruction. + BasicBlock *NewCont = + BB->splitBasicBlock(I, BB->getName()+".selectcont"); + + // Make the true block, and make it branch to the continue block. + BasicBlock *NewTrue = new BasicBlock(BB->getName()+".selecttrue", + BB->getParent(), NewCont); + new BranchInst(NewCont, NewTrue); + + // Make the unconditional branch in the incoming block be a + // conditional branch on the select predicate. + BB->getInstList().erase(BB->getTerminator()); + new BranchInst(NewTrue, NewCont, SI->getCondition(), BB); + + // Create a new PHI node in the cont block with the entries we need. + PHINode *PN = new PHINode(SI->getType(), "", NewCont->begin()); + PN->takeName(SI); + PN->addIncoming(SI->getTrueValue(), NewTrue); + PN->addIncoming(SI->getFalseValue(), BB); + + // Use the PHI instead of the select. + SI->replaceAllUsesWith(PN); + NewCont->getInstList().erase(SI); + + Changed = true; + break; // This block is done with. + } + } + return Changed; +} diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp new file mode 100644 index 0000000000..633633ddc5 --- /dev/null +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -0,0 +1,324 @@ +//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LowerSwitch transformation rewrites switch statements with a sequence of +// branches, which allows targets to get away with not implementing the switch +// statement until it is convenient. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +using namespace llvm; + +namespace { + /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch + /// instructions. Note that this cannot be a BasicBlock pass because it + /// modifies the CFG! + class VISIBILITY_HIDDEN LowerSwitch : public FunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + LowerSwitch() : FunctionPass((intptr_t) &ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // This is a cluster of orthogonal Transforms + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSelectID); + AU.addPreservedID(LowerInvokePassID); + AU.addPreservedID(LowerAllocationsID); + } + + struct CaseRange { + Constant* Low; + Constant* High; + BasicBlock* BB; + + CaseRange() : Low(0), High(0), BB(0) { } + CaseRange(Constant* low, Constant* high, BasicBlock* bb) : + Low(low), High(high), BB(bb) { } + }; + + typedef std::vector<CaseRange> CaseVector; + typedef std::vector<CaseRange>::iterator CaseItr; + private: + void processSwitchInst(SwitchInst *SI); + + BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val, + BasicBlock* OrigBlock, BasicBlock* Default); + BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, BasicBlock* Default); + unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); + }; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + + char LowerSwitch::ID = 0; + RegisterPass<LowerSwitch> + X("lowerswitch", "Lower SwitchInst's to branches"); +} + +// Publically exposed interface to pass... +const PassInfo *llvm::LowerSwitchID = X.getPassInfo(); +// createLowerSwitchPass - Interface to this file... +FunctionPass *llvm::createLowerSwitchPass() { + return new LowerSwitch(); +} + +bool LowerSwitch::runOnFunction(Function &F) { + bool Changed = false; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks + + if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) { + Changed = true; + processSwitchInst(SI); + } + } + + return Changed; +} + +// operator<< - Used for debugging purposes. +// +static std::ostream& operator<<(std::ostream &O, + const LowerSwitch::CaseVector &C) { + O << "["; + + for (LowerSwitch::CaseVector::const_iterator B = C.begin(), + E = C.end(); B != E; ) { + O << *B->Low << " -" << *B->High; + if (++B != E) O << ", "; + } + + return O << "]"; +} + +static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) { + if (O.stream()) *O.stream() << C; + return O; +} + +// switchConvert - Convert the switch statement into a binary lookup of +// the case values. The function recursively builds this tree. +// +BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, + Value* Val, BasicBlock* OrigBlock, + BasicBlock* Default) +{ + unsigned Size = End - Begin; + + if (Size == 1) + return newLeafBlock(*Begin, Val, OrigBlock, Default); + + unsigned Mid = Size / 2; + std::vector<CaseRange> LHS(Begin, Begin + Mid); + DOUT << "LHS: " << LHS << "\n"; + std::vector<CaseRange> RHS(Begin + Mid, End); + DOUT << "RHS: " << RHS << "\n"; + + CaseRange& Pivot = *(Begin + Mid); + DEBUG( DOUT << "Pivot ==> " + << cast<ConstantInt>(Pivot.Low)->getValue().toStringSigned(10) + << " -" + << cast<ConstantInt>(Pivot.High)->getValue().toStringSigned(10) + << "\n"); + + BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val, + OrigBlock, Default); + BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val, + OrigBlock, Default); + + // Create a new node that checks if the value is < pivot. Go to the + // left branch if it is and right branch if not. + Function* F = OrigBlock->getParent(); + BasicBlock* NewNode = new BasicBlock("NodeBlock"); + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewNode); + + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + NewNode->getInstList().push_back(Comp); + new BranchInst(LBranch, RBranch, Comp, NewNode); + return NewNode; +} + +// newLeafBlock - Create a new leaf block for the binary lookup tree. It +// checks if the switch's value == the case's value. If not, then it +// jumps to the default branch. At this point in the tree, the value +// can't be another valid case value, so the jump to the "default" branch +// is warranted. +// +BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, + BasicBlock* OrigBlock, + BasicBlock* Default) +{ + Function* F = OrigBlock->getParent(); + BasicBlock* NewLeaf = new BasicBlock("LeafBlock"); + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewLeaf); + + // Emit comparison + ICmpInst* Comp = NULL; + if (Leaf.Low == Leaf.High) { + // Make the seteq instruction... + Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low, + "SwitchLeaf", NewLeaf); + } else { + // Make range comparison + if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { + // Val >= Min && Val <= Hi --> Val <= Hi + Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf", NewLeaf); + } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { + // Val >= 0 && Val <= Hi --> Val <=u Hi + Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf", NewLeaf); + } else { + // Emit V-Lo <=u Hi-Lo + Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); + Instruction* Add = BinaryOperator::createAdd(Val, NegLo, + Val->getName()+".off", + NewLeaf); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); + Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf", NewLeaf); + } + } + + // Make the conditional branch... + BasicBlock* Succ = Leaf.BB; + new BranchInst(Succ, Default, Comp, NewLeaf); + + // If there were any PHI nodes in this successor, rewrite one entry + // from OrigBlock to come from NewLeaf. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode* PN = cast<PHINode>(I); + // Remove all but one incoming entries from the cluster + uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() - + cast<ConstantInt>(Leaf.Low)->getSExtValue(); + for (uint64_t j = 0; j < Range; ++j) { + PN->removeIncomingValue(OrigBlock); + } + + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); + } + + return NewLeaf; +} + +// Clusterify - Transform simple list of Cases into list of CaseRange's +unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { + unsigned numCmps = 0; + + // Start with "simple" cases + for (unsigned i = 1; i < SI->getNumSuccessors(); ++i) + Cases.push_back(CaseRange(SI->getSuccessorValue(i), + SI->getSuccessorValue(i), + SI->getSuccessor(i))); + sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size()>=2) + for (CaseItr I=Cases.begin(), J=++(Cases.begin()), E=Cases.end(); J!=E; ) { + int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); + int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + BasicBlock* nextBB = J->BB; + BasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue-currentValue==1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +// processSwitchInst - Replace the specified switch instruction with a sequence +// of chained if-then insts in a balanced binary search. +// +void LowerSwitch::processSwitchInst(SwitchInst *SI) { + BasicBlock *CurBlock = SI->getParent(); + BasicBlock *OrigBlock = CurBlock; + Function *F = CurBlock->getParent(); + Value *Val = SI->getOperand(0); // The value we are switching on... + BasicBlock* Default = SI->getDefaultDest(); + + // If there is only the default destination, don't bother with the code below. + if (SI->getNumOperands() == 2) { + new BranchInst(SI->getDefaultDest(), CurBlock); + CurBlock->getInstList().erase(SI); + return; + } + + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock* NewDefault = new BasicBlock("NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + + new BranchInst(Default, NewDefault); + + // If there is an entry in any PHI nodes for the default edge, make sure + // to update them as well. + for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + int BlockIdx = PN->getBasicBlockIndex(OrigBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); + } + + // Prepare cases vector. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + + DOUT << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"; + DOUT << "Cases: " << Cases << "\n"; + + BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, + OrigBlock, NewDefault); + + // Branch to our shiny new if-then stuff... + new BranchInst(SwitchBlock, OrigBlock); + + // We are now done with the switch instruction, delete it. + CurBlock->getInstList().erase(SI); +} diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile new file mode 100644 index 0000000000..26fc4261f1 --- /dev/null +++ b/lib/Transforms/Utils/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by the LLVM research group and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMTransformUtils +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp new file mode 100644 index 0000000000..d67b3dea60 --- /dev/null +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -0,0 +1,93 @@ +//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is a simple pass wrapper around the PromoteMemToReg function call +// exposed by the Utils library. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mem2reg" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +STATISTIC(NumPromoted, "Number of alloca's promoted"); + +namespace { + struct VISIBILITY_HIDDEN PromotePass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + PromotePass() : FunctionPass((intptr_t)&ID) {} + + // runOnFunction - To run this pass, first we calculate the alloca + // instructions that are safe for promotion, then we promote each one. + // + virtual bool runOnFunction(Function &F); + + // getAnalysisUsage - We need dominance frontiers + // + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.addRequired<DominanceFrontier>(); + AU.setPreservesCFG(); + // This is a cluster of orthogonal Transforms + AU.addPreserved<UnifyFunctionExitNodes>(); + AU.addPreservedID(LowerSelectID); + AU.addPreservedID(LowerSwitchID); + AU.addPreservedID(LowerInvokePassID); + AU.addPreservedID(LowerAllocationsID); + } + }; + + char PromotePass::ID = 0; + RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register"); +} // end of anonymous namespace + +bool PromotePass::runOnFunction(Function &F) { + std::vector<AllocaInst*> Allocas; + + BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + + bool Changed = false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); + + while (1) { + Allocas.clear(); + + // Find allocas that are safe to promote, by looking at all instructions in + // the entry node + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? + if (isAllocaPromotable(AI)) + Allocas.push_back(AI); + + if (Allocas.empty()) break; + + PromoteMemToReg(Allocas, DT, DF); + NumPromoted += Allocas.size(); + Changed = true; + } + + return Changed; +} + +// Publically exposed interface to pass... +const PassInfo *llvm::PromoteMemoryToRegisterID = X.getPassInfo(); +// createPromoteMemoryToRegister - Provide an entry point to create this pass. +// +FunctionPass *llvm::createPromoteMemoryToRegisterPass() { + return new PromotePass(); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp new file mode 100644 index 0000000000..259a5a249e --- /dev/null +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -0,0 +1,835 @@ +//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file promote memory references to be register references. It promotes +// alloca instructions which only have loads and stores as uses. An alloca is +// transformed by using dominator frontiers to place PHI nodes, then traversing +// the function in depth-first order to rewrite loads and stores as appropriate. +// This is just the standard SSA construction algorithm to construct "pruned" +// SSA form. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +using namespace llvm; + +// Provide DenseMapKeyInfo for all pointers. +namespace llvm { +template<> +struct DenseMapKeyInfo<std::pair<BasicBlock*, unsigned> > { + static inline std::pair<BasicBlock*, unsigned> getEmptyKey() { + return std::make_pair((BasicBlock*)-1, ~0U); + } + static inline std::pair<BasicBlock*, unsigned> getTombstoneKey() { + return std::make_pair((BasicBlock*)-2, 0U); + } + static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { + return DenseMapKeyInfo<void*>::getHashValue(Val.first) + Val.second*2; + } + static bool isPod() { return true; } +}; +} + +/// isAllocaPromotable - Return true if this alloca is legal for promotion. +/// This is true if there are only loads and stores to the alloca. +/// +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // FIXME: If the memory unit is of pointer or integer type, we can permit + // assignments to subsections of the memory unit. + + // Only allow direct loads and stores... + for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) // Loop over all of the uses of the alloca + if (isa<LoadInst>(*UI)) { + // noop + } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + } else { + return false; // Not a load or store. + } + + return true; +} + +namespace { + + // Data package used by RenamePass() + class VISIBILITY_HIDDEN RenamePassData { + public: + RenamePassData(BasicBlock *B, BasicBlock *P, + const std::vector<Value *> &V) : BB(B), Pred(P), Values(V) {} + BasicBlock *BB; + BasicBlock *Pred; + std::vector<Value *> Values; + }; + + struct VISIBILITY_HIDDEN PromoteMem2Reg { + /// Allocas - The alloca instructions being promoted. + /// + std::vector<AllocaInst*> Allocas; + SmallVector<AllocaInst*, 16> &RetryList; + DominatorTree &DT; + DominanceFrontier &DF; + + /// AST - An AliasSetTracker object to update. If null, don't update it. + /// + AliasSetTracker *AST; + + /// AllocaLookup - Reverse mapping of Allocas. + /// + std::map<AllocaInst*, unsigned> AllocaLookup; + + /// NewPhiNodes - The PhiNodes we're adding. + /// + DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; + + /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas + /// it corresponds to. + DenseMap<PHINode*, unsigned> PhiToAllocaMap; + + /// PointerAllocaValues - If we are updating an AliasSetTracker, then for + /// each alloca that is of pointer type, we keep track of what to copyValue + /// to the inserted PHI nodes here. + /// + std::vector<Value*> PointerAllocaValues; + + /// Visited - The set of basic blocks the renamer has already visited. + /// + SmallPtrSet<BasicBlock*, 16> Visited; + + /// BBNumbers - Contains a stable numbering of basic blocks to avoid + /// non-determinstic behavior. + DenseMap<BasicBlock*, unsigned> BBNumbers; + + /// RenamePassWorkList - Worklist used by RenamePass() + std::vector<RenamePassData> RenamePassWorkList; + + public: + PromoteMem2Reg(const std::vector<AllocaInst*> &A, + SmallVector<AllocaInst*, 16> &Retry, DominatorTree &dt, + DominanceFrontier &df, AliasSetTracker *ast) + : Allocas(A), RetryList(Retry), DT(dt), DF(df), AST(ast) {} + + void run(); + + /// properlyDominates - Return true if I1 properly dominates I2. + /// + bool properlyDominates(Instruction *I1, Instruction *I2) const { + if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) + I1 = II->getNormalDest()->begin(); + return DT.properlyDominates(I1->getParent(), I2->getParent()); + } + + /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. + /// + bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { + return DT.dominates(BB1, BB2); + } + + private: + void MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum, + SmallPtrSet<PHINode*, 16> &DeadPHINodes); + bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI); + void PromoteLocallyUsedAllocas(BasicBlock *BB, + const std::vector<AllocaInst*> &AIs); + + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + std::vector<Value*> &IncVals); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, + SmallPtrSet<PHINode*, 16> &InsertedPHINodes); + }; + +} // end of anonymous namespace + +void PromoteMem2Reg::run() { + Function &F = *DF.getRoot()->getParent(); + + // LocallyUsedAllocas - Keep track of all of the alloca instructions which are + // only used in a single basic block. These instructions can be efficiently + // promoted by performing a single linear scan over that one block. Since + // individual basic blocks are sometimes large, we group together all allocas + // that are live in a single basic block by the basic block they are live in. + std::map<BasicBlock*, std::vector<AllocaInst*> > LocallyUsedAllocas; + + if (AST) PointerAllocaValues.resize(Allocas.size()); + + for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { + AllocaInst *AI = Allocas[AllocaNum]; + + assert(isAllocaPromotable(AI) && + "Cannot promote non-promotable alloca!"); + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. + if (AST) AST->deleteValue(AI); + AI->eraseFromParent(); + + // Remove the alloca from the Allocas list, since it has been processed + Allocas[AllocaNum] = Allocas.back(); + Allocas.pop_back(); + --AllocaNum; + continue; + } + + // Calculate the set of read and write-locations for each alloca. This is + // analogous to finding the 'uses' and 'definitions' of each variable. + std::vector<BasicBlock*> DefiningBlocks; + std::vector<BasicBlock*> UsingBlocks; + + StoreInst *OnlyStore = 0; + BasicBlock *OnlyBlock = 0; + bool OnlyUsedInOneBlock = true; + + // As we scan the uses of the alloca instruction, keep track of stores, and + // decide whether all of the loads and stores to the alloca are within the + // same basic block. + Value *AllocaPointerVal = 0; + for (Value::use_iterator U =AI->use_begin(), E = AI->use_end(); U != E;++U){ + Instruction *User = cast<Instruction>(*U); + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + AllocaPointerVal = SI->getOperand(0); + OnlyStore = SI; + } else { + LoadInst *LI = cast<LoadInst>(User); + // Otherwise it must be a load instruction, keep track of variable reads + UsingBlocks.push_back(LI->getParent()); + AllocaPointerVal = LI; + } + + if (OnlyUsedInOneBlock) { + if (OnlyBlock == 0) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } + } + + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (OnlyUsedInOneBlock) { + LocallyUsedAllocas[OnlyBlock].push_back(AI); + + // Remove the alloca from the Allocas list, since it will be processed. + Allocas[AllocaNum] = Allocas.back(); + Allocas.pop_back(); + --AllocaNum; + continue; + } + + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (DefiningBlocks.size() == 1) { + // Be aware of loads before the store. + std::set<BasicBlock*> ProcessedBlocks; + for (unsigned i = 0, e = UsingBlocks.size(); i != e; ++i) + // If the store dominates the block and if we haven't processed it yet, + // do so now. + if (dominates(OnlyStore->getParent(), UsingBlocks[i])) + if (ProcessedBlocks.insert(UsingBlocks[i]).second) { + BasicBlock *UseBlock = UsingBlocks[i]; + + // If the use and store are in the same block, do a quick scan to + // verify that there are no uses before the store. + if (UseBlock == OnlyStore->getParent()) { + BasicBlock::iterator I = UseBlock->begin(); + for (; &*I != OnlyStore; ++I) { // scan block for store. + if (isa<LoadInst>(I) && I->getOperand(0) == AI) + break; + } + if (&*I != OnlyStore) break; // Do not handle this case. + } + + // Otherwise, if this is a different block or if all uses happen + // after the store, do a simple linear scan to replace loads with + // the stored value. + for (BasicBlock::iterator I = UseBlock->begin(),E = UseBlock->end(); + I != E; ) { + if (LoadInst *LI = dyn_cast<LoadInst>(I++)) { + if (LI->getOperand(0) == AI) { + LI->replaceAllUsesWith(OnlyStore->getOperand(0)); + if (AST && isa<PointerType>(LI->getType())) + AST->deleteValue(LI); + LI->eraseFromParent(); + } + } + } + + // Finally, remove this block from the UsingBlock set. + UsingBlocks[i] = UsingBlocks.back(); + --i; --e; + } + + // Finally, after the scan, check to see if the store is all that is left. + if (UsingBlocks.empty()) { + // The alloca has been processed, move on. + Allocas[AllocaNum] = Allocas.back(); + Allocas.pop_back(); + --AllocaNum; + continue; + } + } + + + if (AST) + PointerAllocaValues[AllocaNum] = AllocaPointerVal; + + // If we haven't computed a numbering for the BB's in the function, do so + // now. + if (BBNumbers.empty()) { + unsigned ID = 0; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + BBNumbers[I] = ID++; + } + + // Compute the locations where PhiNodes need to be inserted. Look at the + // dominance frontier of EACH basic-block we have a write in. + // + unsigned CurrentVersion = 0; + SmallPtrSet<PHINode*, 16> InsertedPHINodes; + std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; + while (!DefiningBlocks.empty()) { + BasicBlock *BB = DefiningBlocks.back(); + DefiningBlocks.pop_back(); + + // Look up the DF for this write, add it to PhiNodes + DominanceFrontier::const_iterator it = DF.find(BB); + if (it != DF.end()) { + const DominanceFrontier::DomSetType &S = it->second; + + // In theory we don't need the indirection through the DFBlocks vector. + // In practice, the order of calling QueuePhiNode would depend on the + // (unspecified) ordering of basic blocks in the dominance frontier, + // which would give PHI nodes non-determinstic subscripts. Fix this by + // processing blocks in order of the occurance in the function. + for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), + PE = S.end(); P != PE; ++P) + DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); + + // Sort by which the block ordering in the function. + std::sort(DFBlocks.begin(), DFBlocks.end()); + + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { + BasicBlock *BB = DFBlocks[i].second; + if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) + DefiningBlocks.push_back(BB); + } + DFBlocks.clear(); + } + } + + // Now that we have inserted PHI nodes along the Iterated Dominance Frontier + // of the writes to the variable, scan through the reads of the variable, + // marking PHI nodes which are actually necessary as alive (by removing them + // from the InsertedPHINodes set). This is not perfect: there may PHI + // marked alive because of loads which are dominated by stores, but there + // will be no unmarked PHI nodes which are actually used. + // + for (unsigned i = 0, e = UsingBlocks.size(); i != e; ++i) + MarkDominatingPHILive(UsingBlocks[i], AllocaNum, InsertedPHINodes); + UsingBlocks.clear(); + + // If there are any PHI nodes which are now known to be dead, remove them! + for (SmallPtrSet<PHINode*, 16>::iterator I = InsertedPHINodes.begin(), + E = InsertedPHINodes.end(); I != E; ++I) { + PHINode *PN = *I; + bool Erased=NewPhiNodes.erase(std::make_pair(PN->getParent(), AllocaNum)); + Erased=Erased; + assert(Erased && "PHI already removed?"); + + if (AST && isa<PointerType>(PN->getType())) + AST->deleteValue(PN); + PN->eraseFromParent(); + PhiToAllocaMap.erase(PN); + } + + // Keep the reverse mapping of the 'Allocas' array. + AllocaLookup[Allocas[AllocaNum]] = AllocaNum; + } + + // Process all allocas which are only used in a single basic block. + for (std::map<BasicBlock*, std::vector<AllocaInst*> >::iterator I = + LocallyUsedAllocas.begin(), E = LocallyUsedAllocas.end(); I != E; ++I){ + const std::vector<AllocaInst*> &LocAllocas = I->second; + assert(!LocAllocas.empty() && "empty alloca list??"); + + // It's common for there to only be one alloca in the list. Handle it + // efficiently. + if (LocAllocas.size() == 1) { + // If we can do the quick promotion pass, do so now. + if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0])) + RetryList.push_back(LocAllocas[0]); // Failed, retry later. + } else { + // Locally promote anything possible. Note that if this is unable to + // promote a particular alloca, it puts the alloca onto the Allocas vector + // for global processing. + PromoteLocallyUsedAllocas(I->first, LocAllocas); + } + } + + if (Allocas.empty()) + return; // All of the allocas must have been trivial! + + // Set the incoming values for the basic block to be null values for all of + // the alloca's. We do this in case there is a load of a value that has not + // been stored yet. In this case, it will get this null value. + // + std::vector<Value *> Values(Allocas.size()); + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) + Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); + + // Walks all basic blocks in the function performing the SSA rename algorithm + // and inserting the phi nodes we marked as necessary + // + RenamePassWorkList.clear(); + RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); + while(!RenamePassWorkList.empty()) { + RenamePassData RPD = RenamePassWorkList.back(); + RenamePassWorkList.pop_back(); + // RenamePass may add new worklist entries. + RenamePass(RPD.BB, RPD.Pred, RPD.Values); + } + + // The renamer uses the Visited set to avoid infinite loops. Clear it now. + Visited.clear(); + + // Remove the allocas themselves from the function. + for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { + Instruction *A = Allocas[i]; + + // If there are any uses of the alloca instructions left, they must be in + // sections of dead code that were not processed on the dominance frontier. + // Just delete the users now. + // + if (!A->use_empty()) + A->replaceAllUsesWith(UndefValue::get(A->getType())); + if (AST) AST->deleteValue(A); + A->eraseFromParent(); + } + + + // Loop over all of the PHI nodes and see if there are any that we can get + // rid of because they merge all of the same incoming values. This can + // happen due to undef values coming into the PHI nodes. This process is + // iterative, because eliminating one PHI node can cause others to be removed. + bool EliminatedAPHI = true; + while (EliminatedAPHI) { + EliminatedAPHI = false; + + for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = + NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { + PHINode *PN = I->second; + + // If this PHI node merges one value and/or undefs, get the value. + if (Value *V = PN->hasConstantValue(true)) { + if (!isa<Instruction>(V) || + properlyDominates(cast<Instruction>(V), PN)) { + if (AST && isa<PointerType>(PN->getType())) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; + } + } + ++I; + } + } + + // At this point, the renamer has added entries to PHI nodes for all reachable + // code. Unfortunately, there may be unreachable blocks which the renamer + // hasn't traversed. If this is the case, the PHI nodes may not + // have incoming values for all predecessors. Loop over all PHI nodes we have + // created, inserting undef values if they are missing any incoming values. + // + for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = + NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { + // We want to do this once per basic block. As such, only process a block + // when we find the PHI that is the first entry in the block. + PHINode *SomePHI = I->second; + BasicBlock *BB = SomePHI->getParent(); + if (&BB->front() != SomePHI) + continue; + + // Count the number of preds for BB. + SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); + + // Only do work here if there the PHI nodes are missing incoming values. We + // know that all PHI nodes that were inserted in a block will have the same + // number of incoming values, so we can just check any of them. + if (SomePHI->getNumIncomingValues() == Preds.size()) + continue; + + // Ok, now we know that all of the PHI nodes are missing entries for some + // basic blocks. Start by sorting the incoming predecessors for efficient + // access. + std::sort(Preds.begin(), Preds.end()); + + // Now we loop through all BB's which have entries in SomePHI and remove + // them from the Preds list. + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { + // Do a log(n) search of the Preds list for the entry we want. + SmallVector<BasicBlock*, 16>::iterator EntIt = + std::lower_bound(Preds.begin(), Preds.end(), + SomePHI->getIncomingBlock(i)); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& + "PHI node has entry for a block which is not a predecessor!"); + + // Remove the entry + Preds.erase(EntIt); + } + + // At this point, the blocks left in the preds list must have dummy + // entries inserted into every PHI nodes for the block. Update all the phi + // nodes in this block that we are inserting (there could be phis before + // mem2reg runs). + unsigned NumBadPreds = SomePHI->getNumIncomingValues(); + BasicBlock::iterator BBI = BB->begin(); + while ((SomePHI = dyn_cast<PHINode>(BBI++)) && + SomePHI->getNumIncomingValues() == NumBadPreds) { + Value *UndefVal = UndefValue::get(SomePHI->getType()); + for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) + SomePHI->addIncoming(UndefVal, Preds[pred]); + } + } + + NewPhiNodes.clear(); +} + +// MarkDominatingPHILive - Mem2Reg wants to construct "pruned" SSA form, not +// "minimal" SSA form. To do this, it inserts all of the PHI nodes on the IDF +// as usual (inserting the PHI nodes in the DeadPHINodes set), then processes +// each read of the variable. For each block that reads the variable, this +// function is called, which removes used PHI nodes from the DeadPHINodes set. +// After all of the reads have been processed, any PHI nodes left in the +// DeadPHINodes set are removed. +// +void PromoteMem2Reg::MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum, + SmallPtrSet<PHINode*, 16> &DeadPHINodes) { + // Scan the immediate dominators of this block looking for a block which has a + // PHI node for Alloca num. If we find it, mark the PHI node as being alive! + DomTreeNode *IDomNode = DT.getNode(BB); + for (DomTreeNode *IDom = IDomNode; IDom; IDom = IDom->getIDom()) { + BasicBlock *DomBB = IDom->getBlock(); + DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator + I = NewPhiNodes.find(std::make_pair(DomBB, AllocaNum)); + if (I != NewPhiNodes.end()) { + // Ok, we found an inserted PHI node which dominates this value. + PHINode *DominatingPHI = I->second; + + // Find out if we previously thought it was dead. If so, mark it as being + // live by removing it from the DeadPHINodes set. + if (DeadPHINodes.erase(DominatingPHI)) { + // Now that we have marked the PHI node alive, also mark any PHI nodes + // which it might use as being alive as well. + for (pred_iterator PI = pred_begin(DomBB), PE = pred_end(DomBB); + PI != PE; ++PI) + MarkDominatingPHILive(*PI, AllocaNum, DeadPHINodes); + } + } + } +} + +/// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic +/// block. If this is the case, avoid traversing the CFG and inserting a lot of +/// potentially useless PHI nodes by just performing a single linear pass over +/// the basic block using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return true. This is necessary in cases where, due to control flow, the +/// alloca is potentially undefined on some control flow paths. e.g. code like +/// this is potentially correct: +/// +/// for (...) { if (c) { A = undef; undef = B; } } +/// +/// ... so long as A is not used before undef is set. +/// +bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI) { + assert(!AI->use_empty() && "There are no uses of the alloca!"); + + // Handle degenerate cases quickly. + if (AI->hasOneUse()) { + Instruction *U = cast<Instruction>(AI->use_back()); + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Must be a load of uninitialized value. + LI->replaceAllUsesWith(UndefValue::get(AI->getAllocatedType())); + if (AST && isa<PointerType>(LI->getType())) + AST->deleteValue(LI); + } else { + // Otherwise it must be a store which is never read. + assert(isa<StoreInst>(U)); + } + BB->getInstList().erase(U); + } else { + // Uses of the uninitialized memory location shall get undef. + Value *CurVal = 0; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->getOperand(0) == AI) { + if (!CurVal) return true; // Could not locally promote! + + // Loads just returns the "current value"... + LI->replaceAllUsesWith(CurVal); + if (AST && isa<PointerType>(LI->getType())) + AST->deleteValue(LI); + BB->getInstList().erase(LI); + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->getOperand(1) == AI) { + // Store updates the "current value"... + CurVal = SI->getOperand(0); + BB->getInstList().erase(SI); + } + } + } + } + + // After traversing the basic block, there should be no more uses of the + // alloca, remove it now. + assert(AI->use_empty() && "Uses of alloca from more than one BB??"); + if (AST) AST->deleteValue(AI); + AI->getParent()->getInstList().erase(AI); + return false; +} + +/// PromoteLocallyUsedAllocas - This method is just like +/// PromoteLocallyUsedAlloca, except that it processes multiple alloca +/// instructions in parallel. This is important in cases where we have large +/// basic blocks, as we don't want to rescan the entire basic block for each +/// alloca which is locally used in it (which might be a lot). +void PromoteMem2Reg:: +PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector<AllocaInst*> &AIs) { + std::map<AllocaInst*, Value*> CurValues; + for (unsigned i = 0, e = AIs.size(); i != e; ++i) + CurValues[AIs[i]] = 0; // Insert with null value + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // Is this a load of an alloca we are tracking? + if (AllocaInst *AI = dyn_cast<AllocaInst>(LI->getOperand(0))) { + std::map<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI); + if (AIt != CurValues.end()) { + // If loading an uninitialized value, allow the inter-block case to + // handle it. Due to control flow, this might actually be ok. + if (AIt->second == 0) { // Use of locally uninitialized value?? + RetryList.push_back(AI); // Retry elsewhere. + CurValues.erase(AIt); // Stop tracking this here. + if (CurValues.empty()) return; + } else { + // Loads just returns the "current value"... + LI->replaceAllUsesWith(AIt->second); + if (AST && isa<PointerType>(LI->getType())) + AST->deleteValue(LI); + BB->getInstList().erase(LI); + } + } + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(SI->getOperand(1))) { + std::map<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI); + if (AIt != CurValues.end()) { + // Store updates the "current value"... + AIt->second = SI->getOperand(0); + BB->getInstList().erase(SI); + } + } + } + } +} + + + +// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific +// Alloca returns true if there wasn't already a phi-node for that variable +// +bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, + unsigned &Version, + SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { + // Look up the basic-block in question. + PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; + + // If the BB already has a phi node added for the i'th alloca then we're done! + if (PN) return false; + + // Create a PhiNode using the dereferenced type... and add the phi-node to the + // BasicBlock. + PN = new PHINode(Allocas[AllocaNo]->getAllocatedType(), + Allocas[AllocaNo]->getName() + "." + + utostr(Version++), BB->begin()); + PhiToAllocaMap[PN] = AllocaNo; + + InsertedPHINodes.insert(PN); + + if (AST && isa<PointerType>(PN->getType())) + AST->copyValue(PointerAllocaValues[AllocaNo], PN); + + return true; +} + + +// RenamePass - Recursively traverse the CFG of the function, renaming loads and +// stores to the allocas which we are promoting. IncomingVals indicates what +// value each Alloca contains on exit from the predecessor block Pred. +// +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, + std::vector<Value*> &IncomingVals) { + // If we are inserting any phi nodes into this BB, they will already be in the + // block. + if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { + // Pred may have multiple edges to BB. If so, we want to add N incoming + // values to each PHI we are inserting on the first time we see the edge. + // Check to see if APN already has incoming values from Pred. This also + // prevents us from modifying PHI nodes that are not currently being + // inserted. + bool HasPredEntries = false; + for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { + if (APN->getIncomingBlock(i) == Pred) { + HasPredEntries = true; + break; + } + } + + // If we have PHI nodes to update, compute the number of edges from Pred to + // BB. + if (!HasPredEntries) { + TerminatorInst *PredTerm = Pred->getTerminator(); + unsigned NumEdges = 0; + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { + if (PredTerm->getSuccessor(i) == BB) + ++NumEdges; + } + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + + // Add entries for all the phis. + BasicBlock::iterator PNI = BB->begin(); + do { + unsigned AllocaNo = PhiToAllocaMap[APN]; + + // Add N incoming values to the PHI node. + for (unsigned i = 0; i != NumEdges; ++i) + APN->addIncoming(IncomingVals[AllocaNo], Pred); + + // The currently active variable for this block is now the PHI. + IncomingVals[AllocaNo] = APN; + + // Get the next phi node. + ++PNI; + APN = dyn_cast<PHINode>(PNI); + if (APN == 0) break; + + // Verify it doesn't already have entries for Pred. If it does, it is + // not being inserted by this mem2reg invocation. + HasPredEntries = false; + for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) { + if (APN->getIncomingBlock(i) == Pred) { + HasPredEntries = true; + break; + } + } + } while (!HasPredEntries); + } + } + + // Don't revisit blocks. + if (!Visited.insert(BB)) return; + + for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { + Instruction *I = II++; // get the instruction, increment iterator + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand())) { + std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); + if (AI != AllocaLookup.end()) { + Value *V = IncomingVals[AI->second]; + + // walk the use list of this load and replace all uses with r + LI->replaceAllUsesWith(V); + if (AST && isa<PointerType>(LI->getType())) + AST->deleteValue(LI); + BB->getInstList().erase(LI); + } + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Delete this instruction and mark the name as the current holder of the + // value + if (AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand())) { + std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); + if (ai != AllocaLookup.end()) { + // what value were we writing? + IncomingVals[ai->second] = SI->getOperand(0); + BB->getInstList().erase(SI); + } + } + } + } + + // Recurse to our successors. + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0; i != TI->getNumSuccessors(); i++) + RenamePassWorkList.push_back(RenamePassData(TI->getSuccessor(i), BB, IncomingVals)); +} + +/// PromoteMemToReg - Promote the specified list of alloca instructions into +/// scalar registers, inserting PHI nodes as appropriate. This function makes +/// use of DominanceFrontier information. This function does not modify the CFG +/// of the function at all. All allocas must be from the same function. +/// +/// If AST is specified, the specified tracker is updated to reflect changes +/// made to the IR. +/// +void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, + DominatorTree &DT, DominanceFrontier &DF, + AliasSetTracker *AST) { + // If there is nothing to do, bail out... + if (Allocas.empty()) return; + + SmallVector<AllocaInst*, 16> RetryList; + PromoteMem2Reg(Allocas, RetryList, DT, DF, AST).run(); + + // PromoteMem2Reg may not have been able to promote all of the allocas in one + // pass, run it again if needed. + std::vector<AllocaInst*> NewAllocas; + while (!RetryList.empty()) { + // If we need to retry some allocas, this is due to there being no store + // before a read in a local block. To counteract this, insert a store of + // undef into the alloca right after the alloca itself. + for (unsigned i = 0, e = RetryList.size(); i != e; ++i) { + BasicBlock::iterator BBI = RetryList[i]; + + new StoreInst(UndefValue::get(RetryList[i]->getAllocatedType()), + RetryList[i], ++BBI); + } + + NewAllocas.assign(RetryList.begin(), RetryList.end()); + RetryList.clear(); + PromoteMem2Reg(NewAllocas, RetryList, DT, DF, AST).run(); + NewAllocas.clear(); + } +} diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp new file mode 100644 index 0000000000..6c34d02c15 --- /dev/null +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -0,0 +1,1905 @@ +//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Peephole optimize the CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "simplifycfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <algorithm> +#include <functional> +#include <set> +#include <map> +using namespace llvm; + +/// SafeToMergeTerminators - Return true if it is safe to merge these two +/// terminator instructions together. +/// +static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { + if (SI1 == SI2) return false; // Can't merge with self! + + // It is not safe to merge these two switch instructions if they have a common + // successor, and if that successor has a PHI node, and if *that* PHI node has + // conflicting incoming values from the two switch blocks. + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) + if (SI1Succs.count(*I)) + for (BasicBlock::iterator BBI = (*I)->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != + PN->getIncomingValueForBlock(SI2BB)) + return false; + } + + return true; +} + +/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will +/// now be entries in it from the 'NewPred' block. The values that will be +/// flowing into the PHI nodes will be the same as those coming in from +/// ExistPred, an existing predecessor of Succ. +static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, + BasicBlock *ExistPred) { + assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) != + succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!"); + if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do + + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + Value *V = PN->getIncomingValueForBlock(ExistPred); + PN->addIncoming(V, NewPred); + } +} + +// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an +// almost-empty BB ending in an unconditional branch to Succ, into succ. +// +// Assumption: Succ is the single successor for BB. +// +static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { + assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); + + // Check to see if one of the predecessors of BB is already a predecessor of + // Succ. If so, we cannot do the transformation if there are any PHI nodes + // with incompatible values coming in from the two edges! + // + if (isa<PHINode>(Succ->front())) { + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); + PI != PE; ++PI) + if (BBPreds.count(*PI)) { + // Loop over all of the PHI nodes checking to see if there are + // incompatible values coming in. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + // Loop up the entries in the PHI node for BB and for *PI if the + // values coming in are non-equal, we cannot merge these two blocks + // (instead we should insert a conditional move or something, then + // merge the blocks). + if (PN->getIncomingValueForBlock(BB) != + PN->getIncomingValueForBlock(*PI)) + return false; // Values are not equal... + } + } + } + + // Finally, if BB has PHI nodes that are used by things other than the PHIs in + // Succ and Succ has predecessors that are not Succ and not Pred, we cannot + // fold these blocks, as we don't know whether BB dominates Succ or not to + // update the PHI nodes correctly. + if (!isa<PHINode>(BB->begin()) || Succ->getSinglePredecessor()) return true; + + // If the predecessors of Succ are only BB and Succ itself, handle it. + bool IsSafe = true; + for (pred_iterator PI = pred_begin(Succ), E = pred_end(Succ); PI != E; ++PI) + if (*PI != Succ && *PI != BB) { + IsSafe = false; + break; + } + if (IsSafe) return true; + + // If the PHI nodes in BB are only used by instructions in Succ, we are ok if + // BB and Succ have no common predecessors. + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; + ++UI) + if (cast<Instruction>(*UI)->getParent() != Succ) + return false; + } + + // Scan the predecessor sets of BB and Succ, making sure there are no common + // predecessors. Common predecessors would cause us to build a phi node with + // differing incoming values, which is not legal. + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + for (pred_iterator PI = pred_begin(Succ), E = pred_end(Succ); PI != E; ++PI) + if (BBPreds.count(*PI)) + return false; + + return true; +} + +/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional +/// branch to Succ, and contains no instructions other than PHI nodes and the +/// branch. If possible, eliminate BB. +static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + BasicBlock *Succ) { + // If our successor has PHI nodes, then we need to update them to include + // entries for BB's predecessors, not for BB itself. Be careful though, + // if this transformation fails (returns true) then we cannot do this + // transformation! + // + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + + DOUT << "Killing Trivial BB: \n" << *BB; + + if (isa<PHINode>(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const std::vector<BasicBlock*> BBPreds(pred_begin(BB), pred_end(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(OldValPN->getIncomingValue(i), + OldValPN->getIncomingBlock(i)); + } else { + for (std::vector<BasicBlock*>::const_iterator PredI = BBPreds.begin(), + End = BBPreds.end(); PredI != End; ++PredI) { + // Add an incoming value for each of the new incoming values... + PN->addIncoming(OldVal, *PredI); + } + } + } + } + + if (isa<PHINode>(&BB->front())) { + std::vector<BasicBlock*> + OldSuccPreds(pred_begin(Succ), pred_end(Succ)); + + // Move all PHI nodes in BB to Succ if they are alive, otherwise + // delete them. + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) + if (PN->use_empty()) { + // Just remove the dead phi. This happens if Succ's PHIs were the only + // users of the PHI nodes. + PN->eraseFromParent(); + } else { + // The instruction is alive, so this means that Succ must have + // *ONLY* had BB as a predecessor, and the PHI node is still valid + // now. Simply move it into Succ, because we know that BB + // strictly dominated Succ. + Succ->getInstList().splice(Succ->begin(), + BB->getInstList(), BB->begin()); + + // We need to add new entries for the PHI node to account for + // predecessors of Succ that the PHI node does not take into + // account. At this point, since we know that BB dominated succ, + // this means that we should any newly added incoming edges should + // use the PHI node as the value for these edges, because they are + // loop back edges. + for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i) + if (OldSuccPreds[i] != BB) + PN->addIncoming(PN, OldSuccPreds[i]); + } + } + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; +} + +/// GetIfCondition - Given a basic block (BB) with two predecessors (and +/// presumably PHI nodes in it), check to see if the merge at this block is due +/// to an "if condition". If so, return the boolean condition that determines +/// which entry into BB will be taken. Also, return by references the block +/// that will be entered from if the condition is true, and the block that will +/// be entered if the condition is false. +/// +/// +static Value *GetIfCondition(BasicBlock *BB, + BasicBlock *&IfTrue, BasicBlock *&IfFalse) { + assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 && + "Function can only handle blocks with 2 predecessors!"); + BasicBlock *Pred1 = *pred_begin(BB); + BasicBlock *Pred2 = *++pred_begin(BB); + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + if (!isa<BranchInst>(Pred1->getTerminator()) || + !isa<BranchInst>(Pred2->getTerminator())) + return 0; + BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator()); + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return 0; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return 0; + } + + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (++pred_begin(Pred2) != pred_end(Pred2)) + return 0; + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + if (pred_begin(Pred1) == pred_end(Pred1) || + ++pred_begin(Pred1) != pred_end(Pred1) || + pred_begin(Pred2) == pred_end(Pred2) || + ++pred_begin(Pred2) != pred_end(Pred2) || + *pred_begin(Pred1) != *pred_begin(Pred2)) + return 0; + + // Otherwise, if this is a conditional branch, then we can use it! + BasicBlock *CommonPred = *pred_begin(Pred1); + if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) { + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); + } + return 0; +} + + +// If we have a merge point of an "if condition" as accepted above, return true +// if the specified value dominates the block. We don't handle the true +// generality of domination here, just a special case which works well enough +// for us. +// +// If AggressiveInsts is non-null, and if V does not dominate BB, we check to +// see if V (which must be an instruction) is cheap to compute and is +// non-trapping. If both are true, the instruction is inserted into the set and +// true is returned. +static bool DominatesMergePoint(Value *V, BasicBlock *BB, + std::set<Instruction*> *AggressiveInsts) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) { + // Non-instructions all dominate instructions, but not all constantexprs + // can be executed unconditionally. + if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) + if (C->canTrap()) + return false; + return true; + } + BasicBlock *PBB = I->getParent(); + + // We don't want to allow weird loops that might have the "if condition" in + // the bottom of this block. + if (PBB == BB) return false; + + // If this instruction is defined in a block that contains an unconditional + // branch to BB, then it must be in the 'conditional' part of the "if + // statement". + if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator())) + if (BI->isUnconditional() && BI->getSuccessor(0) == BB) { + if (!AggressiveInsts) return false; + // Okay, it looks like the instruction IS in the "condition". Check to + // see if its a cheap instruction to unconditionally compute, and if it + // only uses stuff defined outside of the condition. If so, hoist it out. + switch (I->getOpcode()) { + default: return false; // Cannot hoist this out safely. + case Instruction::Load: + // We can hoist loads that are non-volatile and obviously cannot trap. + if (cast<LoadInst>(I)->isVolatile()) + return false; + if (!isa<AllocaInst>(I->getOperand(0)) && + !isa<Constant>(I->getOperand(0))) + return false; + + // Finally, we have to check to make sure there are no instructions + // before the load in its basic block, as we are going to hoist the loop + // out to its predecessor. + if (PBB->begin() != BasicBlock::iterator(I)) + return false; + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::ICmp: + case Instruction::FCmp: + break; // These are all cheap and non-trapping instructions. + } + + // Okay, we can only really hoist these out if their operands are not + // defined in the conditional region. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!DominatesMergePoint(I->getOperand(i), BB, 0)) + return false; + // Okay, it's safe to do this! Remember this instruction. + AggressiveInsts->insert(I); + } + + return true; +} + +// GatherConstantSetEQs - Given a potentially 'or'd together collection of +// icmp_eq instructions that compare a value against a constant, return the +// value being compared, and stick the constant into the Values vector. +static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){ + if (Instruction *Inst = dyn_cast<Instruction>(V)) + if (Inst->getOpcode() == Instruction::ICmp && + cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) { + if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) { + Values.push_back(C); + return Inst->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) { + Values.push_back(C); + return Inst->getOperand(1); + } + } else if (Inst->getOpcode() == Instruction::Or) { + if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values)) + if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values)) + if (LHS == RHS) + return LHS; + } + return 0; +} + +// GatherConstantSetNEs - Given a potentially 'and'd together collection of +// setne instructions that compare a value against a constant, return the value +// being compared, and stick the constant into the Values vector. +static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){ + if (Instruction *Inst = dyn_cast<Instruction>(V)) + if (Inst->getOpcode() == Instruction::ICmp && + cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) { + if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) { + Values.push_back(C); + return Inst->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) { + Values.push_back(C); + return Inst->getOperand(1); + } + } else if (Inst->getOpcode() == Instruction::And) { + if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values)) + if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values)) + if (LHS == RHS) + return LHS; + } + return 0; +} + + + +/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a +/// bunch of comparisons of one value against constants, return the value and +/// the constants being compared. +static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal, + std::vector<ConstantInt*> &Values) { + if (Cond->getOpcode() == Instruction::Or) { + CompVal = GatherConstantSetEQs(Cond, Values); + + // Return true to indicate that the condition is true if the CompVal is + // equal to one of the constants. + return true; + } else if (Cond->getOpcode() == Instruction::And) { + CompVal = GatherConstantSetNEs(Cond, Values); + + // Return false to indicate that the condition is false if the CompVal is + // equal to one of the constants. + return false; + } + return false; +} + +/// ErasePossiblyDeadInstructionTree - If the specified instruction is dead and +/// has no side effects, nuke it. If it uses any instructions that become dead +/// because the instruction is now gone, nuke them too. +static void ErasePossiblyDeadInstructionTree(Instruction *I) { + if (!isInstructionTriviallyDead(I)) return; + + std::vector<Instruction*> InstrsToInspect; + InstrsToInspect.push_back(I); + + while (!InstrsToInspect.empty()) { + I = InstrsToInspect.back(); + InstrsToInspect.pop_back(); + + if (!isInstructionTriviallyDead(I)) continue; + + // If I is in the work list multiple times, remove previous instances. + for (unsigned i = 0, e = InstrsToInspect.size(); i != e; ++i) + if (InstrsToInspect[i] == I) { + InstrsToInspect.erase(InstrsToInspect.begin()+i); + --i, --e; + } + + // Add operands of dead instruction to worklist. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(i))) + InstrsToInspect.push_back(OpI); + + // Remove dead instruction. + I->eraseFromParent(); + } +} + +// isValueEqualityComparison - Return true if the specified terminator checks to +// see if a value is equal to constant integer value. +static Value *isValueEqualityComparison(TerminatorInst *TI) { + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + // Do not permit merging of large switch instructions into their + // predecessors unless there is only one predecessor. + if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), + pred_end(SI->getParent())) > 128) + return 0; + + return SI->getCondition(); + } + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) + if (BI->isConditional() && BI->getCondition()->hasOneUse()) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || + ICI->getPredicate() == ICmpInst::ICMP_NE) && + isa<ConstantInt>(ICI->getOperand(1))) + return ICI->getOperand(0); + return 0; +} + +// Given a value comparison instruction, decode all of the 'cases' that it +// represents and return the 'default' block. +static BasicBlock * +GetValueEqualityComparisonCases(TerminatorInst *TI, + std::vector<std::pair<ConstantInt*, + BasicBlock*> > &Cases) { + if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Cases.reserve(SI->getNumCases()); + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i))); + return SI->getDefaultDest(); + } + + BranchInst *BI = cast<BranchInst>(TI); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + Cases.push_back(std::make_pair(cast<ConstantInt>(ICI->getOperand(1)), + BI->getSuccessor(ICI->getPredicate() == + ICmpInst::ICMP_NE))); + return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); +} + + +// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries +// in the list that match the specified block. +static void EliminateBlockCases(BasicBlock *BB, + std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) { + for (unsigned i = 0, e = Cases.size(); i != e; ++i) + if (Cases[i].second == BB) { + Cases.erase(Cases.begin()+i); + --i; --e; + } +} + +// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as +// well. +static bool +ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1, + std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) { + std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2; + + // Make V1 be smaller than V2. + if (V1->size() > V2->size()) + std::swap(V1, V2); + + if (V1->size() == 0) return false; + if (V1->size() == 1) { + // Just scan V2. + ConstantInt *TheVal = (*V1)[0].first; + for (unsigned i = 0, e = V2->size(); i != e; ++i) + if (TheVal == (*V2)[i].first) + return true; + } + + // Otherwise, just sort both lists and compare element by element. + std::sort(V1->begin(), V1->end()); + std::sort(V2->begin(), V2->end()); + unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); + while (i1 != e1 && i2 != e2) { + if ((*V1)[i1].first == (*V2)[i2].first) + return true; + if ((*V1)[i1].first < (*V2)[i2].first) + ++i1; + else + ++i2; + } + return false; +} + +// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a +// terminator instruction and its block is known to only have a single +// predecessor block, check to see if that predecessor is also a value +// comparison with the same value, and if that comparison determines the outcome +// of this comparison. If so, simplify TI. This does a very limited form of +// jump threading. +static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred) { + Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); + if (!PredVal) return false; // Not a value comparison in predecessor. + + Value *ThisVal = isValueEqualityComparison(TI); + assert(ThisVal && "This isn't a value comparison!!"); + if (ThisVal != PredVal) return false; // Different predicates. + + // Find out information about when control will move from Pred to TI's block. + std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases; + BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), + PredCases); + EliminateBlockCases(PredDef, PredCases); // Remove default from cases. + + // Find information about how control leaves this block. + std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases; + BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); + EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. + + // If TI's block is the default block from Pred's comparison, potentially + // simplify TI based on this knowledge. + if (PredDef == TI->getParent()) { + // If we are here, we know that the value is none of those cases listed in + // PredCases. If there are any cases in ThisCases that are in PredCases, we + // can simplify TI. + if (ValuesOverlap(PredCases, ThisCases)) { + if (BranchInst *BTI = dyn_cast<BranchInst>(TI)) { + // Okay, one of the successors of this condbr is dead. Convert it to a + // uncond br. + assert(ThisCases.size() == 1 && "Branch can only have one case!"); + Value *Cond = BTI->getCondition(); + // Insert the new branch. + Instruction *NI = new BranchInst(ThisDef, TI); + + // Remove PHI node entries for the dead edge. + ThisCases[0].second->removePredecessor(TI->getParent()); + + DOUT << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + + TI->eraseFromParent(); // Nuke the old one. + // If condition is now dead, nuke it. + if (Instruction *CondI = dyn_cast<Instruction>(Cond)) + ErasePossiblyDeadInstructionTree(CondI); + return true; + + } else { + SwitchInst *SI = cast<SwitchInst>(TI); + // Okay, TI has cases that are statically dead, prune them away. + SmallPtrSet<Constant*, 16> DeadCases; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + DeadCases.insert(PredCases[i].first); + + DOUT << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI; + + for (unsigned i = SI->getNumCases()-1; i != 0; --i) + if (DeadCases.count(SI->getCaseValue(i))) { + SI->getSuccessor(i)->removePredecessor(TI->getParent()); + SI->removeCase(i); + } + + DOUT << "Leaving: " << *TI << "\n"; + return true; + } + } + + } else { + // Otherwise, TI's block must correspond to some matched value. Find out + // which value (or set of values) this is. + ConstantInt *TIV = 0; + BasicBlock *TIBB = TI->getParent(); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second == TIBB) + if (TIV == 0) + TIV = PredCases[i].first; + else + return false; // Cannot handle multiple values coming to this block. + assert(TIV && "No edge from pred to succ?"); + + // Okay, we found the one constant that our value can be if we get into TI's + // BB. Find out which successor will unconditionally be branched to. + BasicBlock *TheRealDest = 0; + for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) + if (ThisCases[i].first == TIV) { + TheRealDest = ThisCases[i].second; + break; + } + + // If not handled by any explicit cases, it is handled by the default case. + if (TheRealDest == 0) TheRealDest = ThisDef; + + // Remove PHI node entries for dead edges. + BasicBlock *CheckEdge = TheRealDest; + for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI) + if (*SI != CheckEdge) + (*SI)->removePredecessor(TIBB); + else + CheckEdge = 0; + + // Insert the new branch. + Instruction *NI = new BranchInst(TheRealDest, TI); + + DOUT << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + Instruction *Cond = 0; + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) + Cond = dyn_cast<Instruction>(BI->getCondition()); + TI->eraseFromParent(); // Nuke the old one. + + if (Cond) ErasePossiblyDeadInstructionTree(Cond); + return true; + } + return false; +} + +// FoldValueComparisonIntoPredecessors - The specified terminator is a value +// equality comparison instruction (either a switch or a branch on "X == c"). +// See if any of the predecessors of the terminator block are value comparisons +// on the same value. If so, and if safe to do so, fold them together. +static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { + BasicBlock *BB = TI->getParent(); + Value *CV = isValueEqualityComparison(TI); // CondVal + assert(CV && "Not a comparison?"); + bool Changed = false; + + std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.back(); + Preds.pop_back(); + + // See if the predecessor is a comparison with the same value. + TerminatorInst *PTI = Pred->getTerminator(); + Value *PCV = isValueEqualityComparison(PTI); // PredCondVal + + if (PCV == CV && SafeToMergeTerminators(TI, PTI)) { + // Figure out which 'cases' to copy from SI to PSI. + std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases; + BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); + + std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases; + BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); + + // Based on whether the default edge from PTI goes to BB or not, fill in + // PredCases and PredDefault with the new switch cases we would like to + // build. + std::vector<BasicBlock*> NewSuccessors; + + if (PredDefault == BB) { + // If this is the default destination from PTI, only the edges in TI + // that don't occur in PTI, or that branch to BB will be activated. + std::set<ConstantInt*> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second != BB) + PTIHandled.insert(PredCases[i].first); + else { + // The default destination is BB, we don't need explicit targets. + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; --e; + } + + // Reconstruct the new switch statement we will be building. + if (PredDefault != BBDefault) { + PredDefault->removePredecessor(Pred); + PredDefault = BBDefault; + NewSuccessors.push_back(BBDefault); + } + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (!PTIHandled.count(BBCases[i].first) && + BBCases[i].second != BBDefault) { + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].second); + } + + } else { + // If this is not the default destination from PSI, only the edges + // in SI that occur in PSI with a destination of BB will be + // activated. + std::set<ConstantInt*> PTIHandled; + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + if (PredCases[i].second == BB) { + PTIHandled.insert(PredCases[i].first); + std::swap(PredCases[i], PredCases.back()); + PredCases.pop_back(); + --i; --e; + } + + // Okay, now we know which constants were sent to BB from the + // predecessor. Figure out where they will all go now. + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) + if (PTIHandled.count(BBCases[i].first)) { + // If this is one we are capable of getting... + PredCases.push_back(BBCases[i]); + NewSuccessors.push_back(BBCases[i].second); + PTIHandled.erase(BBCases[i].first);// This constant is taken care of + } + + // If there are any constants vectored to BB that TI doesn't handle, + // they must go to the default destination of TI. + for (std::set<ConstantInt*>::iterator I = PTIHandled.begin(), + E = PTIHandled.end(); I != E; ++I) { + PredCases.push_back(std::make_pair(*I, BBDefault)); + NewSuccessors.push_back(BBDefault); + } + } + + // Okay, at this point, we know which new successor Pred will get. Make + // sure we update the number of entries in the PHI nodes for these + // successors. + for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) + AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + + // Now that the successors are updated, create the new Switch instruction. + SwitchInst *NewSI = new SwitchInst(CV, PredDefault, PredCases.size(),PTI); + for (unsigned i = 0, e = PredCases.size(); i != e; ++i) + NewSI->addCase(PredCases[i].first, PredCases[i].second); + + Instruction *DeadCond = 0; + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) + // If PTI is a branch, remember the condition. + DeadCond = dyn_cast<Instruction>(BI->getCondition()); + Pred->getInstList().erase(PTI); + + // If the condition is dead now, remove the instruction tree. + if (DeadCond) ErasePossiblyDeadInstructionTree(DeadCond); + + // Okay, last check. If BB is still a successor of PSI, then we must + // have an infinite loop case. If so, add an infinitely looping block + // to handle the case to preserve the behavior of the code. + BasicBlock *InfLoopBlock = 0; + for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) + if (NewSI->getSuccessor(i) == BB) { + if (InfLoopBlock == 0) { + // Insert it at the end of the loop, because it's either code, + // or it won't matter if it's hot. :) + InfLoopBlock = new BasicBlock("infloop", BB->getParent()); + new BranchInst(InfLoopBlock, InfLoopBlock); + } + NewSI->setSuccessor(i, InfLoopBlock); + } + + Changed = true; + } + } + return Changed; +} + +/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and +/// BB2, hoist any common code in the two blocks up into the branch block. The +/// caller of this function guarantees that BI's block dominates BB1 and BB2. +static bool HoistThenElseCodeToIf(BranchInst *BI) { + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. In particular, we don't want to get into + // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As + // such, we currently just scan for obviously identical instructions in an + // identical order. + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. + BasicBlock *BB2 = BI->getSuccessor(1); // The false destination + + Instruction *I1 = BB1->begin(), *I2 = BB2->begin(); + if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || + isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2)) + return false; + + // If we get here, we can hoist at least one instruction. + BasicBlock *BIParent = BI->getParent(); + + do { + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) + goto HoistTerminator; + + // For a normal instruction, we just move one to right before the branch, + // then replace all uses of the other with the first. Finally, we remove + // the now redundant second instruction. + BIParent->getInstList().splice(BI, BB1->getInstList(), I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + BB2->getInstList().erase(I2); + + I1 = BB1->begin(); + I2 = BB2->begin(); + } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2)); + + return true; + +HoistTerminator: + // Okay, it is safe to hoist the terminator. + Instruction *NT = I1->clone(); + BIParent->getInstList().insert(BI, NT); + if (NT->getType() != Type::VoidTy) { + I1->replaceAllUsesWith(NT); + I2->replaceAllUsesWith(NT); + NT->takeName(I1); + } + + // Hoisting one of the terminators from our successor is a great thing. + // Unfortunately, the successors of the if/else blocks may have PHI nodes in + // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI + // nodes, so we insert select instruction to compute the final result. + std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects; + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V != BB2V) { + // These values do not agree. Insert a select instruction before NT + // that determines the right value. + SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; + if (SI == 0) + SI = new SelectInst(BI->getCondition(), BB1V, BB2V, + BB1V->getName()+"."+BB2V->getName(), NT); + // Make the PHI node use the select for all incoming values for BB1/BB2 + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) + PN->setIncomingValue(i, SI); + } + } + } + + // Update any PHI nodes in our new successors. + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) + AddPredecessorToBlock(*SI, BIParent, BB1); + + BI->eraseFromParent(); + return true; +} + +/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch +/// across this block. +static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { + BranchInst *BI = cast<BranchInst>(BB->getTerminator()); + unsigned Size = 0; + + // If this basic block contains anything other than a PHI (which controls the + // branch) and branch itself, bail out. FIXME: improve this in the future. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI, ++Size) { + if (Size > 10) return false; // Don't clone large BB's. + + // We can only support instructions that are do not define values that are + // live outside of the current basic block. + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + Instruction *U = cast<Instruction>(*UI); + if (U->getParent() != BB || isa<PHINode>(U)) return false; + } + + // Looks ok, continue checking. + } + + return true; +} + +/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value +/// that is defined in the same block as the branch and if any PHI entries are +/// constants, thread edges corresponding to that entry to be branches to their +/// ultimate destination. +static bool FoldCondBranchOnPHI(BranchInst *BI) { + BasicBlock *BB = BI->getParent(); + PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); + // NOTE: we currently cannot transform this case if the PHI node is used + // outside of the block. + if (!PN || PN->getParent() != BB || !PN->hasOneUse()) + return false; + + // Degenerate case of a single entry PHI. + if (PN->getNumIncomingValues() == 1) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + return true; + } + + // Now we know that this block has multiple preds and two succs. + if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + + // Okay, this is a simple enough basic block. See if any phi values are + // constants. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantInt *CB; + if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && + CB->getType() == Type::Int1Ty) { + // Okay, we now know that all edges from PredBB should be revectored to + // branch to RealDest. + BasicBlock *PredBB = PN->getIncomingBlock(i); + BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); + + if (RealDest == BB) continue; // Skip self loops. + + // The dest block might have PHI nodes, other predecessors and other + // difficult cases. Instead of being smart about this, just insert a new + // block that jumps to the destination block, effectively splitting + // the edge we are about to create. + BasicBlock *EdgeBB = new BasicBlock(RealDest->getName()+".critedge", + RealDest->getParent(), RealDest); + new BranchInst(RealDest, EdgeBB); + PHINode *PN; + for (BasicBlock::iterator BBI = RealDest->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *V = PN->getIncomingValueForBlock(BB); + PN->addIncoming(V, EdgeBB); + } + + // BB may have instructions that are being threaded over. Clone these + // instructions into EdgeBB. We know that there will be no uses of the + // cloned instructions outside of EdgeBB. + BasicBlock::iterator InsertPt = EdgeBB->begin(); + std::map<Value*, Value*> TranslateMap; // Track translated values. + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); + } else { + // Clone the instruction. + Instruction *N = BBI->clone(); + if (BBI->hasName()) N->setName(BBI->getName()+".c"); + + // Update operands due to translation. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + std::map<Value*, Value*>::iterator PI = + TranslateMap.find(N->getOperand(i)); + if (PI != TranslateMap.end()) + N->setOperand(i, PI->second); + } + + // Check for trivial simplification. + if (Constant *C = ConstantFoldInstruction(N)) { + TranslateMap[BBI] = C; + delete N; // Constant folded away, don't need actual inst + } else { + // Insert the new instruction into its new home. + EdgeBB->getInstList().insert(InsertPt, N); + if (!BBI->use_empty()) + TranslateMap[BBI] = N; + } + } + } + + // Loop over all of the edges from PredBB to BB, changing them to branch + // to EdgeBB instead. + TerminatorInst *PredBBTI = PredBB->getTerminator(); + for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) + if (PredBBTI->getSuccessor(i) == BB) { + BB->removePredecessor(PredBB); + PredBBTI->setSuccessor(i, EdgeBB); + } + + // Recurse, simplifying any other constants. + return FoldCondBranchOnPHI(BI) | true; + } + } + + return false; +} + +/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry +/// PHI node, see if we can eliminate it. +static bool FoldTwoEntryPHINode(PHINode *PN) { + // Ok, this is a two entry PHI node. Check to see if this is a simple "if + // statement", which has a very simple dominance structure. Basically, we + // are trying to find the condition that is being branched on, which + // subsequently causes this merge to happen. We really want control + // dependence information for this check, but simplifycfg can't keep it up + // to date, and this catches most of the cases we care about anyway. + // + BasicBlock *BB = PN->getParent(); + BasicBlock *IfTrue, *IfFalse; + Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse); + if (!IfCond) return false; + + // Okay, we found that we can merge this two-entry phi node into a select. + // Doing so would require us to fold *all* two entry phi nodes in this block. + // At some point this becomes non-profitable (particularly if the target + // doesn't support cmov's). Only do this transformation if there are two or + // fewer PHI nodes in this block. + unsigned NumPhis = 0; + for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) + if (NumPhis > 2) + return false; + + DOUT << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"; + + // Loop over the PHI's seeing if we can promote them all to select + // instructions. While we are at it, keep track of the instructions + // that need to be moved to the dominating block. + std::set<Instruction*> AggressiveInsts; + + BasicBlock::iterator AfterPHIIt = BB->begin(); + while (isa<PHINode>(AfterPHIIt)) { + PHINode *PN = cast<PHINode>(AfterPHIIt++); + if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) { + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB, + &AggressiveInsts) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, + &AggressiveInsts)) { + return false; + } + } + + // If we all PHI nodes are promotable, check to make sure that all + // instructions in the predecessor blocks can be promoted as well. If + // not, we won't be able to get rid of the control flow, so it's not + // worth promoting to select instructions. + BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0; + PN = cast<PHINode>(BB->begin()); + BasicBlock *Pred = PN->getIncomingBlock(0); + if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) { + IfBlock1 = Pred; + DomBlock = *pred_begin(Pred); + for (BasicBlock::iterator I = Pred->begin(); + !isa<TerminatorInst>(I); ++I) + if (!AggressiveInsts.count(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control + // flow, so the xform is not worth it. + return false; + } + } + + Pred = PN->getIncomingBlock(1); + if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) { + IfBlock2 = Pred; + DomBlock = *pred_begin(Pred); + for (BasicBlock::iterator I = Pred->begin(); + !isa<TerminatorInst>(I); ++I) + if (!AggressiveInsts.count(I)) { + // This is not an aggressive instruction that we can promote. + // Because of this, we won't be able to get rid of the control + // flow, so the xform is not worth it. + return false; + } + } + + // If we can still promote the PHI nodes after this gauntlet of tests, + // do all of the PHI's now. + + // Move all 'aggressive' instructions, which are defined in the + // conditional parts of the if's up to the dominating block. + if (IfBlock1) { + DomBlock->getInstList().splice(DomBlock->getTerminator(), + IfBlock1->getInstList(), + IfBlock1->begin(), + IfBlock1->getTerminator()); + } + if (IfBlock2) { + DomBlock->getInstList().splice(DomBlock->getTerminator(), + IfBlock2->getInstList(), + IfBlock2->begin(), + IfBlock2->getTerminator()); + } + + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + // Change the PHI node into a select instruction. + Value *TrueVal = + PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); + Value *FalseVal = + PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); + + Value *NV = new SelectInst(IfCond, TrueVal, FalseVal, "", AfterPHIIt); + PN->replaceAllUsesWith(NV); + NV->takeName(PN); + + BB->getInstList().erase(PN); + } + return true; +} + +namespace { + /// ConstantIntOrdering - This class implements a stable ordering of constant + /// integers that does not depend on their address. This is important for + /// applications that sort ConstantInt's to ensure uniqueness. + struct ConstantIntOrdering { + bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { + return LHS->getValue().ult(RHS->getValue()); + } + }; +} + +// SimplifyCFG - This function is used to do simplification of a CFG. For +// example, it adjusts branches to branches to eliminate the extra hop, it +// eliminates unreachable basic blocks, and does other "peephole" optimization +// of the CFG. It returns true if a modification was made. +// +// WARNING: The entry node of a function may not be simplified. +// +bool llvm::SimplifyCFG(BasicBlock *BB) { + bool Changed = false; + Function *M = BB->getParent(); + + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + assert(&BB->getParent()->getEntryBlock() != BB && + "Can't Simplify entry block!"); + + // Remove basic blocks that have no predecessors... which are unreachable. + if (pred_begin(BB) == pred_end(BB) || + *pred_begin(BB) == BB && ++pred_begin(BB) == pred_end(BB)) { + DOUT << "Removing BB: \n" << *BB; + + // Loop through all of our successors and make sure they know that one + // of their predecessors is going away. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + SI->removePredecessor(BB); + + while (!BB->empty()) { + Instruction &I = BB->back(); + // If this instruction is used, replace uses with an arbitrary + // value. Because control flow can't get here, we don't care + // what we replace the value with. Note that since this block is + // unreachable, and all values contained within it must dominate their + // uses, that all uses will eventually be removed. + if (!I.use_empty()) + // Make all users of this instruction use undef instead + I.replaceAllUsesWith(UndefValue::get(I.getType())); + + // Remove the instruction from the basic block + BB->getInstList().pop_back(); + } + M->getBasicBlockList().erase(BB); + return true; + } + + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB); + + // If this is a returning block with only PHI nodes in it, fold the return + // instruction into any unconditional branch predecessors. + // + // If any predecessor is a conditional branch that just selects among + // different return values, fold the replace the branch/return with a select + // and return. + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + BasicBlock::iterator BBI = BB->getTerminator(); + if (BBI == BB->begin() || isa<PHINode>(--BBI)) { + // Find predecessors that end with branches. + std::vector<BasicBlock*> UncondBranchPreds; + std::vector<BranchInst*> CondBranchPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + TerminatorInst *PTI = (*PI)->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) + if (BI->isUnconditional()) + UncondBranchPreds.push_back(*PI); + else + CondBranchPreds.push_back(BI); + } + + // If we found some, do the transformation! + if (!UncondBranchPreds.empty()) { + while (!UncondBranchPreds.empty()) { + BasicBlock *Pred = UncondBranchPreds.back(); + DOUT << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred; + UncondBranchPreds.pop_back(); + Instruction *UncondBranch = Pred->getTerminator(); + // Clone the return and add it to the end of the predecessor. + Instruction *NewRet = RI->clone(); + Pred->getInstList().push_back(NewRet); + + // If the return instruction returns a value, and if the value was a + // PHI node in "BB", propagate the right value into the return. + if (NewRet->getNumOperands() == 1) + if (PHINode *PN = dyn_cast<PHINode>(NewRet->getOperand(0))) + if (PN->getParent() == BB) + NewRet->setOperand(0, PN->getIncomingValueForBlock(Pred)); + // Update any PHI nodes in the returning block to realize that we no + // longer branch to them. + BB->removePredecessor(Pred); + Pred->getInstList().erase(UncondBranch); + } + + // If we eliminated all predecessors of the block, delete the block now. + if (pred_begin(BB) == pred_end(BB)) + // We know there are no successors, so just nuke the block. + M->getBasicBlockList().erase(BB); + + return true; + } + + // Check out all of the conditional branches going to this return + // instruction. If any of them just select between returns, change the + // branch itself into a select/return pair. + while (!CondBranchPreds.empty()) { + BranchInst *BI = CondBranchPreds.back(); + CondBranchPreds.pop_back(); + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + BasicBlock *OtherSucc = TrueSucc == BB ? FalseSucc : TrueSucc; + + // Check to see if the non-BB successor is also a return block. + if (isa<ReturnInst>(OtherSucc->getTerminator())) { + // Check to see if there are only PHI instructions in this block. + BasicBlock::iterator OSI = OtherSucc->getTerminator(); + if (OSI == OtherSucc->begin() || isa<PHINode>(--OSI)) { + // Okay, we found a branch that is going to two return nodes. If + // there is no return value for this function, just change the + // branch into a return. + if (RI->getNumOperands() == 0) { + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + new ReturnInst(0, BI); + BI->getParent()->getInstList().erase(BI); + return true; + } + + // Otherwise, figure out what the true and false return values are + // so we can insert a new select instruction. + Value *TrueValue = TrueSucc->getTerminator()->getOperand(0); + Value *FalseValue = FalseSucc->getTerminator()->getOperand(0); + + // Unwrap any PHI nodes in the return blocks. + if (PHINode *TVPN = dyn_cast<PHINode>(TrueValue)) + if (TVPN->getParent() == TrueSucc) + TrueValue = TVPN->getIncomingValueForBlock(BI->getParent()); + if (PHINode *FVPN = dyn_cast<PHINode>(FalseValue)) + if (FVPN->getParent() == FalseSucc) + FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); + + // In order for this transformation to be safe, we must be able to + // unconditionally execute both operands to the return. This is + // normally the case, but we could have a potentially-trapping + // constant expression that prevents this transformation from being + // safe. + if ((!isa<ConstantExpr>(TrueValue) || + !cast<ConstantExpr>(TrueValue)->canTrap()) && + (!isa<ConstantExpr>(TrueValue) || + !cast<ConstantExpr>(TrueValue)->canTrap())) { + TrueSucc->removePredecessor(BI->getParent()); + FalseSucc->removePredecessor(BI->getParent()); + + // Insert a new select instruction. + Value *NewRetVal; + Value *BrCond = BI->getCondition(); + if (TrueValue != FalseValue) + NewRetVal = new SelectInst(BrCond, TrueValue, + FalseValue, "retval", BI); + else + NewRetVal = TrueValue; + + DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "Select = " << *NewRetVal + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc; + + new ReturnInst(NewRetVal, BI); + BI->eraseFromParent(); + if (Instruction *BrCondI = dyn_cast<Instruction>(BrCond)) + if (isInstructionTriviallyDead(BrCondI)) + BrCondI->eraseFromParent(); + return true; + } + } + } + } + } + } else if (isa<UnwindInst>(BB->begin())) { + // Check to see if the first instruction in this block is just an unwind. + // If so, replace any invoke instructions which use this as an exception + // destination with call instructions, and any unconditional branch + // predecessor with an unwind. + // + std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB)); + while (!Preds.empty()) { + BasicBlock *Pred = Preds.back(); + if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) { + if (BI->isUnconditional()) { + Pred->getInstList().pop_back(); // nuke uncond branch + new UnwindInst(Pred); // Use unwind. + Changed = true; + } + } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator())) + if (II->getUnwindDest() == BB) { + // Insert a new branch instruction before the invoke, because this + // is now a fall through... + BranchInst *BI = new BranchInst(II->getNormalDest(), II); + Pred->getInstList().remove(II); // Take out of symbol table + + // Insert the call now... + SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end()); + CallInst *CI = new CallInst(II->getCalledValue(), + &Args[0], Args.size(), II->getName(), BI); + CI->setCallingConv(II->getCallingConv()); + // If the invoke produced a value, the Call now does instead + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + + Preds.pop_back(); + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB)) { + // We know there are no successors, so just nuke the block. + M->getBasicBlockList().erase(BB); + return true; + } + + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) + return SimplifyCFG(BB) || 1; + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + if (SI == &BB->front()) + if (FoldValueComparisonIntoPredecessors(SI)) + return SimplifyCFG(BB) || 1; + } + } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (BI->isUnconditional()) { + BasicBlock::iterator BBI = BB->begin(); // Skip over phi nodes... + while (isa<PHINode>(*BBI)) ++BBI; + + BasicBlock *Succ = BI->getSuccessor(0); + if (BBI->isTerminator() && // Terminator is the only non-phi instruction! + Succ != BB) // Don't hurt infinite loops! + if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ)) + return 1; + + } else { // Conditional branch + if (isValueEqualityComparison(BI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this + // switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred)) + return SimplifyCFG(BB) || 1; + + // This block must be empty, except for the setcond inst, if it exists. + BasicBlock::iterator I = BB->begin(); + if (&*I == BI || + (&*I == cast<Instruction>(BI->getCondition()) && + &*++I == BI)) + if (FoldValueComparisonIntoPredecessors(BI)) + return SimplifyCFG(BB) | true; + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI)) + return SimplifyCFG(BB) | true; + + // If this basic block is ONLY a setcc and a branch, and if a predecessor + // branches to us and one of our successors, fold the setcc into the + // predecessor and use logical operations to pick the right destination. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = BI->getSuccessor(1); + if (Instruction *Cond = dyn_cast<Instruction>(BI->getCondition())) { + BasicBlock::iterator CondIt = Cond; + if ((isa<CmpInst>(Cond) || isa<BinaryOperator>(Cond)) && + Cond->getParent() == BB && &BB->front() == Cond && + &*++CondIt == BI && Cond->hasOneUse() && + TrueDest != BB && FalseDest != BB) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI!=E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI->isConditional() && SafeToMergeTerminators(BI, PBI)) { + BasicBlock *PredBlock = *PI; + if (PBI->getSuccessor(0) == FalseDest || + PBI->getSuccessor(1) == TrueDest) { + // Invert the predecessors condition test (xor it with true), + // which allows us to write this code once. + Value *NewCond = + BinaryOperator::createNot(PBI->getCondition(), + PBI->getCondition()->getName()+".not", PBI); + PBI->setCondition(NewCond); + BasicBlock *OldTrue = PBI->getSuccessor(0); + BasicBlock *OldFalse = PBI->getSuccessor(1); + PBI->setSuccessor(0, OldFalse); + PBI->setSuccessor(1, OldTrue); + } + + if ((PBI->getSuccessor(0) == TrueDest && FalseDest != BB) || + (PBI->getSuccessor(1) == FalseDest && TrueDest != BB)) { + // Clone Cond into the predecessor basic block, and or/and the + // two conditions together. + Instruction *New = Cond->clone(); + PredBlock->getInstList().insert(PBI, New); + New->takeName(Cond); + Cond->setName(New->getName()+".old"); + Instruction::BinaryOps Opcode = + PBI->getSuccessor(0) == TrueDest ? + Instruction::Or : Instruction::And; + Value *NewCond = + BinaryOperator::create(Opcode, PBI->getCondition(), + New, "bothcond", PBI); + PBI->setCondition(NewCond); + if (PBI->getSuccessor(0) == BB) { + AddPredecessorToBlock(TrueDest, PredBlock, BB); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + AddPredecessorToBlock(FalseDest, PredBlock, BB); + PBI->setSuccessor(1, FalseDest); + } + return SimplifyCFG(BB) | 1; + } + } + } + + // Scan predessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) { + + // If this block ends with a branch instruction, and if there is a + // predecessor that ends on a branch of the same condition, make + // this conditional branch redundant. + if (PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + // Okay, the outcome of this conditional branch is statically + // knowable. If this block had a single pred, handle specially. + if (BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + bool CondIsTrue = PBI->getSuccessor(0) == BB; + BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue)); + return SimplifyCFG(BB); // Nuke the branch on constant. + } + + // Otherwise, if there are multiple predecessors, insert a PHI + // that merges in the constant and simplify the block result. + if (BlockIsSimpleEnoughToThreadThrough(BB)) { + PHINode *NewPN = new PHINode(Type::Int1Ty, + BI->getCondition()->getName()+".pr", + BB->begin()); + for (PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if ((PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) && + PBI != BI && PBI->isConditional() && + PBI->getCondition() == BI->getCondition() && + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + bool CondIsTrue = PBI->getSuccessor(0) == BB; + NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, + CondIsTrue), *PI); + } else { + NewPN->addIncoming(BI->getCondition(), *PI); + } + + BI->setCondition(NewPN); + // This will thread the branch. + return SimplifyCFG(BB) | true; + } + } + + // If this is a conditional branch in an empty block, and if any + // predecessors is a conditional branch to one of our destinations, + // fold the conditions into logical ops and one cond br. + if (&BB->front() == BI) { + int PBIOp, BIOp; + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + PBIOp = BIOp = 0; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + PBIOp = 0; BIOp = 1; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + PBIOp = 1; BIOp = 0; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + PBIOp = BIOp = 1; + } else { + PBIOp = BIOp = -1; + } + + // Check to make sure that the other destination of this branch + // isn't BB itself. If so, this is an infinite loop that will + // keep getting unwound. + if (PBIOp != -1 && PBI->getSuccessor(PBIOp) == BB) + PBIOp = BIOp = -1; + + // Do not perform this transformation if it would require + // insertion of a large number of select instructions. For targets + // without predication/cmovs, this is a big pessimization. + if (PBIOp != -1) { + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + + unsigned NumPhis = 0; + for (BasicBlock::iterator II = CommonDest->begin(); + isa<PHINode>(II); ++II, ++NumPhis) { + if (NumPhis > 2) { + // Disable this xform. + PBIOp = -1; + break; + } + } + } + + // Finally, if everything is ok, fold the branches to logical ops. + if (PBIOp != -1) { + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + + // If OtherDest *is* BB, then this is a basic block with just + // a conditional branch in it, where one edge (OtherDesg) goes + // back to the block. We know that the program doesn't get + // stuck in the infinite loop, so the condition must be such + // that OtherDest isn't branched through. Forward to CommonDest, + // and avoid an infinite loop at optimizer time. + if (OtherDest == BB) + OtherDest = CommonDest; + + DOUT << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent(); + + // BI may have other predecessors. Because of this, we leave + // it alone, but modify PBI. + + // Make sure we get to CommonDest on True&True directions. + Value *PBICond = PBI->getCondition(); + if (PBIOp) + PBICond = BinaryOperator::createNot(PBICond, + PBICond->getName()+".not", + PBI); + Value *BICond = BI->getCondition(); + if (BIOp) + BICond = BinaryOperator::createNot(BICond, + BICond->getName()+".not", + PBI); + // Merge the conditions. + Value *Cond = + BinaryOperator::createOr(PBICond, BICond, "brmerge", PBI); + + // Modify PBI to branch on the new condition to the new dests. + PBI->setCondition(Cond); + PBI->setSuccessor(0, CommonDest); + PBI->setSuccessor(1, OtherDest); + + // OtherDest may have phi nodes. If so, add an entry from PBI's + // block that are identical to the entries for BI's block. + PHINode *PN; + for (BasicBlock::iterator II = OtherDest->begin(); + (PN = dyn_cast<PHINode>(II)); ++II) { + Value *V = PN->getIncomingValueForBlock(BB); + PN->addIncoming(V, PBI->getParent()); + } + + // We know that the CommonDest already had an edge from PBI to + // it. If it has PHIs though, the PHIs may have different + // entries for BB and PBI's BB. If so, insert a select to make + // them agree. + for (BasicBlock::iterator II = CommonDest->begin(); + (PN = dyn_cast<PHINode>(II)); ++II) { + Value * BIV = PN->getIncomingValueForBlock(BB); + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (BIV != PBIV) { + // Insert a select in PBI to pick the right value. + Value *NV = new SelectInst(PBICond, PBIV, BIV, + PBIV->getName()+".mux", PBI); + PN->setIncomingValue(PBBIdx, NV); + } + } + + DOUT << "INTO: " << *PBI->getParent(); + + // This basic block is probably dead. We know it has at least + // one fewer predecessor. + return SimplifyCFG(BB) | true; + } + } + } + } + } else if (isa<UnreachableInst>(BB->getTerminator())) { + // If there are any instructions immediately before the unreachable that can + // be removed, do so. + Instruction *Unreachable = BB->getTerminator(); + while (Unreachable != BB->begin()) { + BasicBlock::iterator BBI = Unreachable; + --BBI; + if (isa<CallInst>(BBI)) break; + // Delete this instruction + BB->getInstList().erase(BBI); + Changed = true; + } + + // If the unreachable instruction is the first in the block, take a gander + // at all of the predecessors of this instruction, and simplify them. + if (&BB->front() == Unreachable) { + std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB)); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + TerminatorInst *TI = Preds[i]->getTerminator(); + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isUnconditional()) { + if (BI->getSuccessor(0) == BB) { + new UnreachableInst(TI); + TI->eraseFromParent(); + Changed = true; + } + } else { + if (BI->getSuccessor(0) == BB) { + new BranchInst(BI->getSuccessor(1), BI); + BI->eraseFromParent(); + } else if (BI->getSuccessor(1) == BB) { + new BranchInst(BI->getSuccessor(0), BI); + BI->eraseFromParent(); + Changed = true; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == BB) { + BB->removePredecessor(SI->getParent()); + SI->removeCase(i); + --i; --e; + Changed = true; + } + // If the default value is unreachable, figure out the most popular + // destination and make it the default. + if (SI->getSuccessor(0) == BB) { + std::map<BasicBlock*, unsigned> Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + Popularity[SI->getSuccessor(i)]++; + + // Find the most popular block. + unsigned MaxPop = 0; + BasicBlock *MaxBlock = 0; + for (std::map<BasicBlock*, unsigned>::iterator + I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { + if (I->second > MaxPop) { + MaxPop = I->second; + MaxBlock = I->first; + } + } + if (MaxBlock) { + // Make this the new default, allowing us to delete any explicit + // edges to it. + SI->setSuccessor(0, MaxBlock); + Changed = true; + + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from + // it. + if (isa<PHINode>(MaxBlock->begin())) + for (unsigned i = 0; i != MaxPop-1; ++i) + MaxBlock->removePredecessor(SI->getParent()); + + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) + if (SI->getSuccessor(i) == MaxBlock) { + SI->removeCase(i); + --i; --e; + } + } + } + } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { + if (II->getUnwindDest() == BB) { + // Convert the invoke to a call instruction. This would be a good + // place to note that the call does not throw though. + BranchInst *BI = new BranchInst(II->getNormalDest(), II); + II->removeFromParent(); // Take out of symbol table + + // Insert the call now... + SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end()); + CallInst *CI = new CallInst(II->getCalledValue(), + &Args[0], Args.size(), + II->getName(), BI); + CI->setCallingConv(II->getCallingConv()); + // If the invoke produced a value, the Call does now instead. + II->replaceAllUsesWith(CI); + delete II; + Changed = true; + } + } + } + + // If this block is now dead, remove it. + if (pred_begin(BB) == pred_end(BB)) { + // We know there are no successors, so just nuke the block. + M->getBasicBlockList().erase(BB); + return true; + } + } + } + + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + // + pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); + BasicBlock *OnlyPred = *PI++; + for (; PI != PE; ++PI) // Search all predecessors, see if they are all same + if (*PI != OnlyPred) { + OnlyPred = 0; // There are multiple different predecessors... + break; + } + + BasicBlock *OnlySucc = 0; + if (OnlyPred && OnlyPred != BB && // Don't break self loops + OnlyPred->getTerminator()->getOpcode() != Instruction::Invoke) { + // Check to see if there is only one distinct successor... + succ_iterator SI(succ_begin(OnlyPred)), SE(succ_end(OnlyPred)); + OnlySucc = BB; + for (; SI != SE; ++SI) + if (*SI != OnlySucc) { + OnlySucc = 0; // There are multiple distinct successors! + break; + } + } + + if (OnlySucc) { + DOUT << "Merging: " << *BB << "into: " << *OnlyPred; + + // Resolve any PHI nodes at the start of the block. They are all + // guaranteed to have exactly one entry if they exist, unless there are + // multiple duplicate (but guaranteed to be equal) entries for the + // incoming edges. This occurs when there are multiple edges from + // OnlyPred to OnlySucc. + // + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + BB->getInstList().pop_front(); // Delete the phi node. + } + + // Delete the unconditional branch from the predecessor. + OnlyPred->getInstList().pop_back(); + + // Move all definitions in the successor to the predecessor. + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + + // Make all PHI nodes that referred to BB now refer to Pred as their + // source. + BB->replaceAllUsesWith(OnlyPred); + + // Inherit predecessors name if it exists. + if (!OnlyPred->hasName()) + OnlyPred->takeName(BB); + + // Erase basic block from the function. + M->getBasicBlockList().erase(BB); + + return true; + } + + // Otherwise, if this block only has a single predecessor, and if that block + // is a conditional branch, see if we can hoist any code from this block up + // into our predecessor. + if (OnlyPred) + if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator())) + if (BI->isConditional()) { + // Get the other block. + BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB); + PI = pred_begin(OtherBB); + ++PI; + if (PI == pred_end(OtherBB)) { + // We have a conditional branch to two blocks that are only reachable + // from the condbr. We know that the condbr dominates the two blocks, + // so see if there is any identical code in the "then" and "else" + // blocks. If so, we can hoist it up to the branching block. + Changed |= HoistThenElseCodeToIf(BI); + } + } + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator())) + // Change br (X == 0 | X == 1), T, F into a switch instruction. + if (BI->isConditional() && isa<Instruction>(BI->getCondition())) { + Instruction *Cond = cast<Instruction>(BI->getCondition()); + // If this is a bunch of seteq's or'd together, or if it's a bunch of + // 'setne's and'ed together, collect them. + Value *CompVal = 0; + std::vector<ConstantInt*> Values; + bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values); + if (CompVal && CompVal->getType()->isInteger()) { + // There might be duplicate constants in the list, which the switch + // instruction can't handle, remove them now. + std::sort(Values.begin(), Values.end(), ConstantIntOrdering()); + Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); + + // Figure out which block is which destination. + BasicBlock *DefaultBB = BI->getSuccessor(1); + BasicBlock *EdgeBB = BI->getSuccessor(0); + if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + + // Create the new switch instruction now. + SwitchInst *New = new SwitchInst(CompVal, DefaultBB,Values.size(),BI); + + // Add all of the 'cases' to the switch instruction. + for (unsigned i = 0, e = Values.size(); i != e; ++i) + New->addCase(Values[i], EdgeBB); + + // We added edges from PI to the EdgeBB. As such, if there were any + // PHI nodes in EdgeBB, they need entries to be added corresponding to + // the number of edges added. + for (BasicBlock::iterator BBI = EdgeBB->begin(); + isa<PHINode>(BBI); ++BBI) { + PHINode *PN = cast<PHINode>(BBI); + Value *InVal = PN->getIncomingValueForBlock(*PI); + for (unsigned i = 0, e = Values.size()-1; i != e; ++i) + PN->addIncoming(InVal, *PI); + } + + // Erase the old branch instruction. + (*PI)->getInstList().erase(BI); + + // Erase the potentially condition tree that was used to computed the + // branch condition. + ErasePossiblyDeadInstructionTree(Cond); + return true; + } + } + + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN); + + return Changed; +} diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp new file mode 100644 index 0000000000..b545ad3fce --- /dev/null +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -0,0 +1,138 @@ +//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to ensure that functions have at most one return +// instruction in them. Additionally, it keeps track of which node is the new +// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode +// method will return a null pointer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +using namespace llvm; + +char UnifyFunctionExitNodes::ID = 0; +static RegisterPass<UnifyFunctionExitNodes> +X("mergereturn", "Unify function exit nodes"); + +int UnifyFunctionExitNodes::stub; + +Pass *llvm::createUnifyFunctionExitNodesPass() { + return new UnifyFunctionExitNodes(); +} + +void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ + // We preserve the non-critical-edgeness property + AU.addPreservedID(BreakCriticalEdgesID); + // This is a cluster of orthogonal Transforms + AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreservedID(LowerSelectID); + AU.addPreservedID(LowerSwitchID); +} + +// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new +// BasicBlock, and converting all returns to unconditional branches to this +// new basic block. The singular exit node is returned. +// +// If there are no return stmts in the Function, a null pointer is returned. +// +bool UnifyFunctionExitNodes::runOnFunction(Function &F) { + // Loop over all of the blocks in a function, tracking all of the blocks that + // return. + // + std::vector<BasicBlock*> ReturningBlocks; + std::vector<BasicBlock*> UnwindingBlocks; + std::vector<BasicBlock*> UnreachableBlocks; + for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (isa<ReturnInst>(I->getTerminator())) + ReturningBlocks.push_back(I); + else if (isa<UnwindInst>(I->getTerminator())) + UnwindingBlocks.push_back(I); + else if (isa<UnreachableInst>(I->getTerminator())) + UnreachableBlocks.push_back(I); + + // Handle unwinding blocks first. + if (UnwindingBlocks.empty()) { + UnwindBlock = 0; + } else if (UnwindingBlocks.size() == 1) { + UnwindBlock = UnwindingBlocks.front(); + } else { + UnwindBlock = new BasicBlock("UnifiedUnwindBlock", &F); + new UnwindInst(UnwindBlock); + + for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(), + E = UnwindingBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + BB->getInstList().pop_back(); // Remove the unwind insn + new BranchInst(UnwindBlock, BB); + } + } + + // Then unreachable blocks. + if (UnreachableBlocks.empty()) { + UnreachableBlock = 0; + } else if (UnreachableBlocks.size() == 1) { + UnreachableBlock = UnreachableBlocks.front(); + } else { + UnreachableBlock = new BasicBlock("UnifiedUnreachableBlock", &F); + new UnreachableInst(UnreachableBlock); + + for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(), + E = UnreachableBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + BB->getInstList().pop_back(); // Remove the unreachable inst. + new BranchInst(UnreachableBlock, BB); + } + } + + // Now handle return blocks. + if (ReturningBlocks.empty()) { + ReturnBlock = 0; + return false; // No blocks return + } else if (ReturningBlocks.size() == 1) { + ReturnBlock = ReturningBlocks.front(); // Already has a single return block + return false; + } + + // Otherwise, we need to insert a new basic block into the function, add a PHI + // node (if the function returns a value), and convert all of the return + // instructions into unconditional branches. + // + BasicBlock *NewRetBlock = new BasicBlock("UnifiedReturnBlock", &F); + + PHINode *PN = 0; + if (F.getReturnType() != Type::VoidTy) { + // If the function doesn't return void... add a PHI node to the block... + PN = new PHINode(F.getReturnType(), "UnifiedRetVal"); + NewRetBlock->getInstList().push_back(PN); + } + new ReturnInst(PN, NewRetBlock); + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + // + for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(), + E = ReturningBlocks.end(); I != E; ++I) { + BasicBlock *BB = *I; + + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->getInstList().pop_back(); // Remove the return insn + new BranchInst(NewRetBlock, BB); + } + ReturnBlock = NewRetBlock; + return true; +} diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp new file mode 100644 index 0000000000..0b8c5c2796 --- /dev/null +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -0,0 +1,118 @@ +//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue function, which is shared by various parts of +// the lib/Transforms/Utils library. +// +//===----------------------------------------------------------------------===// + +#include "ValueMapper.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instruction.h" +using namespace llvm; + +Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { + Value *&VMSlot = VM[V]; + if (VMSlot) return VMSlot; // Does it exist in the map yet? + + // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the + // DenseMap. This includes any recursive calls to MapValue. + + // Global values do not need to be seeded into the ValueMap if they are using + // the identity mapping. + if (isa<GlobalValue>(V) || isa<InlineAsm>(V)) + return VMSlot = const_cast<Value*>(V); + + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { + if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || + isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) || + isa<UndefValue>(C)) + return VMSlot = C; // Primitive constants map directly + else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + Value *MV = MapValue(CA->getOperand(i), VM); + if (MV != CA->getOperand(i)) { + // This array must contain a reference to a global, make a new array + // and return it. + // + std::vector<Constant*> Values; + Values.reserve(CA->getNumOperands()); + for (unsigned j = 0; j != i; ++j) + Values.push_back(CA->getOperand(j)); + Values.push_back(cast<Constant>(MV)); + for (++i; i != e; ++i) + Values.push_back(cast<Constant>(MapValue(CA->getOperand(i), VM))); + return VM[V] = ConstantArray::get(CA->getType(), Values); + } + } + return VM[V] = C; + + } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + Value *MV = MapValue(CS->getOperand(i), VM); + if (MV != CS->getOperand(i)) { + // This struct must contain a reference to a global, make a new struct + // and return it. + // + std::vector<Constant*> Values; + Values.reserve(CS->getNumOperands()); + for (unsigned j = 0; j != i; ++j) + Values.push_back(CS->getOperand(j)); + Values.push_back(cast<Constant>(MV)); + for (++i; i != e; ++i) + Values.push_back(cast<Constant>(MapValue(CS->getOperand(i), VM))); + return VM[V] = ConstantStruct::get(CS->getType(), Values); + } + } + return VM[V] = C; + + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + std::vector<Constant*> Ops; + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) + Ops.push_back(cast<Constant>(MapValue(CE->getOperand(i), VM))); + return VM[V] = CE->getWithOperands(Ops); + } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) { + Value *MV = MapValue(CP->getOperand(i), VM); + if (MV != CP->getOperand(i)) { + // This vector value must contain a reference to a global, make a new + // vector constant and return it. + // + std::vector<Constant*> Values; + Values.reserve(CP->getNumOperands()); + for (unsigned j = 0; j != i; ++j) + Values.push_back(CP->getOperand(j)); + Values.push_back(cast<Constant>(MV)); + for (++i; i != e; ++i) + Values.push_back(cast<Constant>(MapValue(CP->getOperand(i), VM))); + return VM[V] = ConstantVector::get(Values); + } + } + return VM[V] = C; + + } else { + assert(0 && "Unknown type of constant!"); + } + } + + return 0; +} + +/// RemapInstruction - Convert the instruction operands from referencing the +/// current values into those specified by ValueMap. +/// +void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) { + for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { + const Value *Op = I->getOperand(op); + Value *V = MapValue(Op, ValueMap); + assert(V && "Referenced value not in value map!"); + I->setOperand(op, V); + } +} diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h new file mode 100644 index 0000000000..51319db55c --- /dev/null +++ b/lib/Transforms/Utils/ValueMapper.h @@ -0,0 +1,29 @@ +//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue interface which is used by various parts of +// the Transforms/Utils library to implement cloning and linking facilities. +// +//===----------------------------------------------------------------------===// + +#ifndef VALUEMAPPER_H +#define VALUEMAPPER_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class Value; + class Instruction; + typedef DenseMap<const Value *, Value *> ValueMapTy; + + Value *MapValue(const Value *V, ValueMapTy &VM); + void RemapInstruction(Instruction *I, ValueMapTy &VM); +} // End llvm namespace + +#endif |