diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Transforms/InstCombine | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 107 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 102 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 149 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 305 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 89 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 390 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 51 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 128 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombinePHI.cpp | 80 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 92 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 91 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 85 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 115 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineWorklist.h | 7 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 246 |
15 files changed, 1406 insertions, 631 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 822e146ac4..e04b1be53d 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -20,34 +20,38 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#define DEBUG_TYPE "instcombine" + namespace llvm { - class CallSite; - class DataLayout; - class TargetLibraryInfo; - class DbgDeclareInst; - class MemIntrinsic; - class MemSetInst; +class CallSite; +class DataLayout; +class TargetLibraryInfo; +class DbgDeclareInst; +class MemIntrinsic; +class MemSetInst; /// SelectPatternFlavor - We can match a variety of different patterns for /// select operations. enum SelectPatternFlavor { SPF_UNKNOWN = 0, - SPF_SMIN, SPF_UMIN, - SPF_SMAX, SPF_UMAX - //SPF_ABS - TODO. + SPF_SMIN, + SPF_UMIN, + SPF_SMAX, + SPF_UMAX + // SPF_ABS - TODO. }; /// getComplexity: Assign a complexity or rank value to LLVM Values... /// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static inline unsigned getComplexity(Value *V) { if (isa<Instruction>(V)) { - if (BinaryOperator::isNeg(V) || - BinaryOperator::isFNeg(V) || + if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V)) return 3; return 4; } - if (isa<Argument>(V)) return 3; + if (isa<Argument>(V)) + return 3; return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; } @@ -60,18 +64,18 @@ static inline Constant *SubOne(Constant *C) { return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } - /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; + public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - void InsertHelper(Instruction *I, const Twine &Name, - BasicBlock *BB, BasicBlock::iterator InsertPt) const { + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } @@ -79,13 +83,14 @@ public: /// InstCombiner - The -instcombine pass. class LLVM_LIBRARY_VISIBILITY InstCombiner - : public FunctionPass, - public InstVisitor<InstCombiner, Instruction*> { + : public FunctionPass, + public InstVisitor<InstCombiner, Instruction *> { const DataLayout *DL; TargetLibraryInfo *TLI; bool MadeIRChange; LibCallSimplifier *Simplifier; bool MinimizeSize; + public: /// Worklist - All of the instructions that need to be simplified. InstCombineWorklist Worklist; @@ -96,7 +101,7 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), DL(0), Builder(0) { + InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) { MinimizeSize = false; initializeInstCombinerPass(*PassRegistry::getPassRegistry()); } @@ -144,9 +149,9 @@ public: Instruction *visitAnd(BinaryOperator &I); Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C); - Instruction *visitOr (BinaryOperator &I); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, + Value *B, Value *C); + Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); Instruction *visitAShr(BinaryOperator &I); @@ -156,12 +161,11 @@ public: Constant *RHSC); Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, - ConstantInt *AndCst = 0); + ConstantInt *AndCst = nullptr); Instruction *visitFCmpInst(FCmpInst &I); Instruction *visitICmpInst(ICmpInst &I); Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); - Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHS, + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS, ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); @@ -171,7 +175,7 @@ public: ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); - Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); Instruction *commonPointerCastTransforms(CastInst &CI); @@ -188,9 +192,8 @@ public: Instruction *visitIntToPtr(IntToPtrInst &CI); Instruction *visitBitCast(BitCastInst &CI); Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); - Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI); - Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *); Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C); @@ -209,6 +212,7 @@ public: Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); @@ -216,21 +220,21 @@ public: Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... - Instruction *visitInstruction(Instruction &I) { return 0; } + Instruction *visitInstruction(Instruction &I) { return nullptr; } private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; + Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices); + SmallVectorImpl<Value *> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually /// results in any code being generated and is interesting to optimize out. If /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. - bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V, Type *Ty); Instruction *visitCallSite(CallSite CS); @@ -251,10 +255,10 @@ public: // in the program. Add the new instruction to the worklist. // Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { - assert(New && New->getParent() == 0 && + assert(New && !New->getParent() && "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst + BB->getInstList().insert(&Old, New); // Insert inst Worklist.Add(New); return New; } @@ -274,7 +278,7 @@ public: // modified. // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. @@ -306,12 +310,12 @@ public: Worklist.Remove(&I); I.eraseFromParent(); MadeIRChange = true; - return 0; // Don't do anything with FI + return nullptr; // Don't do anything with FI } - void ComputeMaskedBits(Value *V, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, DL, Depth); + void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + unsigned Depth = 0) const { + return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth); } bool MaskedValueIsZero(Value *V, const APInt &Mask, @@ -323,7 +327,6 @@ public: } private: - /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative. bool SimplifyAssociativeOrCommutative(BinaryOperator &I); @@ -337,12 +340,10 @@ private: /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth=0); + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, @@ -355,7 +356,9 @@ private: bool SimplifyDemandedInstructionBits(Instruction &Inst); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, unsigned Depth = 0); + APInt &UndefElts, unsigned Depth = 0); + + Value *SimplifyVectorOp(BinaryOperator &Inst); // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction @@ -372,21 +375,19 @@ private: Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); - Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside); + Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, + bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); /// Descale - Return a value X such that Val = X * Scale, or null if none. If @@ -394,8 +395,8 @@ private: Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; - - } // end namespace llvm. +#undef DEBUG_TYPE + #endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 97910c7b45..c37a9cf2ef 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -20,6 +20,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + namespace { /// Class representing coefficient of floating-point addend. @@ -112,12 +114,12 @@ namespace { /// class FAddend { public: - FAddend() { Val = 0; } + FAddend() { Val = nullptr; } Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - bool isConstant() const { return Val == 0; } + bool isConstant() const { return Val == nullptr; } bool isZero() const { return Coeff.isZero(); } void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } @@ -154,7 +156,7 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {} Value *simplify(Instruction *FAdd); private: @@ -348,8 +350,8 @@ Value *FAddendCoef::getValue(Type *Ty) const { // unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { - Instruction *I = 0; - if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) + Instruction *I = nullptr; + if (!Val || !(I = dyn_cast<Instruction>(Val))) return 0; unsigned Opcode = I->getOpcode(); @@ -359,16 +361,16 @@ unsigned FAddend::drillValueDownOneStep Value *Opnd0 = I->getOperand(0); Value *Opnd1 = I->getOperand(1); if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) - Opnd0 = 0; + Opnd0 = nullptr; if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) - Opnd1 = 0; + Opnd1 = nullptr; if (Opnd0) { if (!C0) Addend0.set(1, Opnd0); else - Addend0.set(C0, 0); + Addend0.set(C0, nullptr); } if (Opnd1) { @@ -376,7 +378,7 @@ unsigned FAddend::drillValueDownOneStep if (!C1) Addend.set(1, Opnd1); else - Addend.set(C1, 0); + Addend.set(C1, nullptr); if (Opcode == Instruction::FSub) Addend.negate(); } @@ -385,7 +387,7 @@ unsigned FAddend::drillValueDownOneStep return Opnd0 && Opnd1 ? 2 : 1; // Both operands are zero. Weird! - Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr); return 1; } @@ -443,13 +445,13 @@ Value *FAddCombine::performFactorization(Instruction *I) { Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) - return 0; + return nullptr; bool isMpy = false; if (I0->getOpcode() == Instruction::FMul) isMpy = true; else if (I0->getOpcode() != Instruction::FDiv) - return 0; + return nullptr; Value *Opnd0_0 = I0->getOperand(0); Value *Opnd0_1 = I0->getOperand(1); @@ -461,8 +463,8 @@ Value *FAddCombine::performFactorization(Instruction *I) { // (x*y) +/- (x*z) x y z // (y/x) +/- (z/x) x y z // - Value *Factor = 0; - Value *AddSub0 = 0, *AddSub1 = 0; + Value *Factor = nullptr; + Value *AddSub0 = nullptr, *AddSub1 = nullptr; if (isMpy) { if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) @@ -481,7 +483,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { } if (!Factor) - return 0; + return nullptr; FastMathFlags Flags; Flags.setUnsafeAlgebra(); @@ -495,7 +497,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { const APFloat &F = CFP->getValueAPF(); if (!F.isNormal()) - return 0; + return nullptr; } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub)) II->setFastMathFlags(Flags); @@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { // Currently we are not able to handle vector type. if (I->getType()->isVectorTy()) - return 0; + return nullptr; assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); @@ -568,7 +570,7 @@ Value *FAddCombine::simplify(Instruction *I) { // been optimized into "I = Y - X" in the previous steps. // const FAddendCoef &CE = Opnd0.getCoef(); - return CE.isOne() ? Opnd0.getSymVal() : 0; + return CE.isOne() ? Opnd0.getSymVal() : nullptr; } // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] @@ -614,7 +616,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // constant close to supper-expr(s) will potentially reveal some optimization // opportunities in super-expr(s). // - const FAddend *ConstAdd = 0; + const FAddend *ConstAdd = nullptr; // Simplified addends are placed <SimpVect>. AddendVect SimpVect; @@ -647,7 +649,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = nullptr; SimpVect.push_back(T); } } @@ -661,7 +663,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // Pop all addends being folded and push the resulting folded addend. SimpVect.resize(StartIdx); - if (Val != 0) { + if (Val) { if (!R.isZero()) { SimpVect.push_back(&R); } @@ -698,7 +700,7 @@ Value *FAddCombine::createNaryFAdd // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) - return 0; + return nullptr; initCreateInstNum(); @@ -710,7 +712,7 @@ Value *FAddCombine::createNaryFAdd // N-ary addition has at most two instructions, and we don't need to worry // about tree-height when constructing the N-ary addition. - Value *LastVal = 0; + Value *LastVal = nullptr; bool LastValNeedNeg = false; // Iterate the addends, creating fadd/fsub using adjacent two addends. @@ -870,10 +872,10 @@ Value *FAddCombine::createAddendVal // static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy()) - return 0; + return nullptr; Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return 0; + if (!I) return nullptr; if (I->getOpcode() == Instruction::Mul) if ((CST = dyn_cast<Constant>(I->getOperand(1)))) @@ -884,7 +886,7 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST); return I->getOperand(0); } - return 0; + return nullptr; } @@ -918,6 +920,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -942,7 +947,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - Value *XorLHS = 0; ConstantInt *XorRHS = 0; + Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt &RHSVal = CI->getValue(); @@ -974,7 +979,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { IntegerType *IT = cast<IntegerType>(I.getType()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne); if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -1042,11 +1047,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) @@ -1174,7 +1179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Check for (x & y) + (x ^ y) { - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && (match(LHS, m_And(m_Specific(A), m_Specific(B))) || match(LHS, m_And(m_Specific(B), m_Specific(A))))) @@ -1186,13 +1191,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1266,7 +1274,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { if (C1 == C2) { - Constant *Z1=0, *Z2=0; + Constant *Z1=nullptr, *Z2=nullptr; Value *A, *B, *C=C1; if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { Z1 = dyn_cast<Constant>(A1); A = A2; @@ -1290,7 +1298,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } @@ -1305,7 +1313,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; - GEPOperator *GEP1 = 0, *GEP2 = 0; + GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; // For now we require one side to be the base pointer "A" or a constant // GEP derived from it. @@ -1343,9 +1351,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // Avoid duplicating the arithmetic if GEP2 has non-constant indices and // multiple users. - if (GEP1 == 0 || - (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) - return 0; + if (!GEP1 || + (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) + return nullptr; // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); @@ -1368,6 +1376,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -1393,7 +1404,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Constant *C = dyn_cast<Constant>(Op0)) { // C - ~X == X + (1+C) - Value *X = 0; + Value *X = nullptr; if (match(Op1, m_Not(m_Value(X)))) return BinaryOperator::CreateAdd(X, AddOne(C)); @@ -1451,9 +1462,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } if (Op1->hasOneUse()) { - Value *X = 0, *Y = 0, *Z = 0; - Constant *C = 0; - Constant *CI = 0; + Value *X = nullptr, *Y = nullptr, *Z = nullptr; + Constant *C = nullptr; + Constant *CI = nullptr; // (X - (Y - Z)) --> (X + (Z - Y)). if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) @@ -1532,12 +1543,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Res); } - return 0; + return nullptr; } Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1574,5 +1588,5 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2c1bfc73fd..4f5d65ab78 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -20,6 +20,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// isFreeToInvert - Return true if the specified value is free to invert (apply /// ~ to). This happens in cases where the ~ can be eliminated. static inline bool isFreeToInvert(Value *V) { @@ -50,7 +52,7 @@ static inline Value *dyn_castNotVal(Value *V) { // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast<ConstantInt>(V)) return ConstantInt::get(C->getType(), ~C->getValue()); - return 0; + return nullptr; } /// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp @@ -123,7 +125,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, ConstantInt *AndRHS, BinaryOperator &TheAnd) { Value *X = Op->getOperand(0); - Constant *Together = 0; + Constant *Together = nullptr; if (!Op->isShift()) Together = ConstantExpr::getAnd(AndRHS, OpRHS); @@ -250,7 +252,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, } break; } - return 0; + return nullptr; } /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise @@ -332,12 +334,12 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, Instruction &I) { Instruction *LHSI = dyn_cast<Instruction>(LHS); if (!LHSI || LHSI->getNumOperands() != 2 || - !isa<ConstantInt>(LHSI->getOperand(1))) return 0; + !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr; ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); switch (LHSI->getOpcode()) { - default: return 0; + default: return nullptr; case Instruction::And: if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. @@ -357,7 +359,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, break; } } - return 0; + return nullptr; case Instruction::Or: case Instruction::Xor: // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 @@ -365,7 +367,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; - return 0; + return nullptr; } if (isSub) @@ -418,12 +420,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, ConstantInt *BCst = dyn_cast<ConstantInt>(B); ConstantInt *CCst = dyn_cast<ConstantInt>(C); bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); - bool icmp_abit = (ACst != 0 && !ACst->isZero() && + bool icmp_abit = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2()); - bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + bool icmp_bbit = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2()); unsigned result = 0; - if (CCst != 0 && CCst->isZero()) { + if (CCst && CCst->isZero()) { // if C is zero, then both A and B qualify as mask result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_Mask_AllZeroes | @@ -455,7 +457,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_AMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed)); - } else if (ACst != 0 && CCst != 0 && + } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_AMask_Mixed : FoldMskICmp_AMask_NotMixed); @@ -470,7 +472,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_BMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_BMask_Mixed)); - } else if (BCst != 0 && CCst != 0 && + } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_BMask_Mixed : FoldMskICmp_BMask_NotMixed); @@ -570,12 +572,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value *L11,*L12,*L21,*L22; // Check whether the icmp can be decomposed into a bit test. if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { - L21 = L22 = L1 = 0; + L21 = L22 = L1 = nullptr; } else { // Look for ANDs in the LHS icmp. if (!L1->getType()->isIntegerTy()) { // You can icmp pointers, for example. They really aren't masks. - L11 = L12 = 0; + L11 = L12 = nullptr; } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { // Any icmp can be viewed as being trivially masked; if it allows us to // remove one, it's worth it. @@ -585,7 +587,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, if (!L2->getType()->isIntegerTy()) { // You can icmp pointers, for example. They really aren't masks. - L21 = L22 = 0; + L21 = L22 = nullptr; } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { L21 = L2; L22 = Constant::getAllOnesValue(L2->getType()); @@ -608,7 +610,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, } else { return 0; } - E = R2; R1 = 0; ok = true; + E = R2; R1 = nullptr; ok = true; } else if (R1->getType()->isIntegerTy()) { if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { // As before, model no mask as a trivial mask if it'll let us do an @@ -665,11 +667,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, /// into a single (icmp(A & X) ==/!= Y) static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, llvm::InstCombiner::BuilderTy* Builder) { - Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, LHSCC, RHSCC); - if (mask == 0) return 0; + if (mask == 0) return nullptr; assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); @@ -722,9 +724,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // their actual values. This isn't strictly, necessary, just a "handle the // easy cases for now" decision. ConstantInt *BCst = dyn_cast<ConstantInt>(B); - if (BCst == 0) return 0; + if (!BCst) return nullptr; ConstantInt *DCst = dyn_cast<ConstantInt>(D); - if (DCst == 0) return 0; + if (!DCst) return nullptr; if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) { // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and @@ -763,11 +765,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set ConstantInt *CCst = dyn_cast<ConstantInt>(C); - if (CCst == 0) return 0; + if (!CCst) return nullptr; if (LHSCC != NEWCC) CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); ConstantInt *ECst = dyn_cast<ConstantInt>(E); - if (ECst == 0) return 0; + if (!ECst) return nullptr; if (RHSCC != NEWCC) ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); ConstantInt* MCst = dyn_cast<ConstantInt>( @@ -776,13 +778,13 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // if there is a conflict we should actually return a false for the // whole construct if (!MCst->isZero()) - return 0; + return nullptr; Value *newOr1 = Builder->CreateOr(B, D); Value *newOr2 = ConstantExpr::getOr(CCst, ECst); Value *newAnd = Builder->CreateAnd(A, newOr1); return Builder->CreateICmp(NEWCC, newAnd, newOr2); } - return 0; + return nullptr; } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -811,7 +813,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); - if (LHSCst == 0 || RHSCst == 0) return 0; + if (!LHSCst || !RHSCst) return nullptr; if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) @@ -835,7 +837,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && LHS->hasOneUse() && RHS->hasOneUse()) { Value *V; - ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr; // (trunc x) == C1 & (and x, CA) == C2 // (and x, CA) == C2 & (trunc x) == C1 @@ -866,14 +868,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; + if (Val != Val2) return nullptr; // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; + return nullptr; // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. @@ -887,7 +889,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; + return nullptr; // Ensure that the larger constant is on the RHS. bool ShouldSwap; @@ -1016,7 +1018,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { break; } - return 0; + return nullptr; } /// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of @@ -1026,7 +1028,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) - return 0; + return nullptr; // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) @@ -1043,7 +1045,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); - return 0; + return nullptr; } Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); @@ -1096,7 +1098,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { } } - return 0; + return nullptr; } @@ -1104,6 +1106,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAndInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1198,7 +1203,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If this is an integer truncation, and if the source is an 'and' with // immediate, transform it. This frequently occurs for bitfield accesses. { - Value *X = 0; ConstantInt *YC = 0; + Value *X = nullptr; ConstantInt *YC = nullptr; if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { // Change: and (trunc (and X, YC) to T), C2 // into : and (trunc X to T), trunc(YC) & C2 @@ -1231,7 +1236,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // (A|B) & ~(A&B) -> A^B if (match(Op0, m_Or(m_Value(A), m_Value(B))) && match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && @@ -1339,7 +1344,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *X = 0; + Value *X = nullptr; bool OpsSwapped = false; // Canonicalize SExt or Not to the LHS if (match(Op1, m_SExt(m_Value())) || @@ -1366,7 +1371,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { std::swap(Op0, Op1); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// CollectBSwapParts - Analyze the specified subexpression and see if it is @@ -1498,7 +1503,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) - return 0; // Can only bswap pairs of bytes. Can't do vectors. + return nullptr; // Can only bswap pairs of bytes. Can't do vectors. /// ByteValues - For each byte of the result, we keep track of which value /// defines each byte. @@ -1508,16 +1513,16 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { // Try to find all the pieces corresponding to the bswap. uint32_t ByteMask = ~0U >> (32-ByteValues.size()); if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) - return 0; + return nullptr; // Check to see if all of the bytes come from the same value. Value *V = ByteValues[0]; - if (V == 0) return 0; // Didn't find a byte? Must be zero. + if (!V) return nullptr; // Didn't find a byte? Must be zero. // Check to make sure that all of the bytes come from the same value. for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) - return 0; + return nullptr; Module *M = I.getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); @@ -1529,10 +1534,10 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { static Instruction *MatchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D) { // If A is not a select of -1/0, this cannot match. - Value *Cond = 0; + Value *Cond = nullptr; if (!match(A, m_SExt(m_Value(Cond))) || !Cond->getType()->isIntegerTy(1)) - return 0; + return nullptr; // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. if (match(D, m_Not(m_SExt(m_Specific(Cond))))) @@ -1545,7 +1550,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, return SelectInst::Create(Cond, C, D); if (match(B, m_SExt(m_Not(m_Specific(Cond))))) return SelectInst::Create(Cond, C, D); - return 0; + return nullptr; } /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. @@ -1566,8 +1571,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { LAnd->getOpcode() == Instruction::And && RAnd->getOpcode() == Instruction::And) { - Value *Mask = 0; - Value *Masked = 0; + Value *Mask = nullptr; + Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) && isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) { @@ -1608,7 +1613,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHS->hasOneUse() || RHS->hasOneUse()) { // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) { B = Val; if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1)) @@ -1632,7 +1637,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (LHSCst == 0 || RHSCst == 0) return 0; + if (!LHSCst || !RHSCst) return nullptr; if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) @@ -1653,18 +1658,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; + if (Val != Val2) return nullptr; // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; + return nullptr; // We can't fold (ugt x, C) | (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; + return nullptr; // Ensure that the larger constant is on the RHS. bool ShouldSwap; @@ -1809,7 +1814,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } break; } - return 0; + return nullptr; } /// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of @@ -1837,7 +1842,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { isa<ConstantAggregateZero>(RHS->getOperand(1))) return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); - return 0; + return nullptr; } Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); @@ -1869,7 +1874,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); } } - return 0; + return nullptr; } /// FoldOrWithConstants - This helper function folds: @@ -1884,27 +1889,30 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C) { ConstantInt *CI1 = dyn_cast<ConstantInt>(C); - if (!CI1) return 0; + if (!CI1) return nullptr; - Value *V1 = 0; - ConstantInt *CI2 = 0; - if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + Value *V1 = nullptr; + ConstantInt *CI2 = nullptr; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr; APInt Xor = CI1->getValue() ^ CI2->getValue(); - if (!Xor.isAllOnesValue()) return 0; + if (!Xor.isAllOnesValue()) return nullptr; if (V1 == A || V1 == B) { Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } - return 0; + return nullptr; } Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyOrInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1918,7 +1926,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - ConstantInt *C1 = 0; Value *X = 0; + ConstantInt *C1 = nullptr; Value *X = nullptr; // (X & C1) | C2 --> (X | C2) & (C1|C2) // iff (C1 & C2) == 0. if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && @@ -1949,8 +1957,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return NV; } - Value *A = 0, *B = 0; - ConstantInt *C1 = 0, *C2 = 0; + Value *A = nullptr, *B = nullptr; + ConstantInt *C1 = nullptr, *C2 = nullptr; // (A | B) | C and A | (B | C) -> bswap if possible. // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. @@ -1981,10 +1989,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (A & C)|(B & D) - Value *C = 0, *D = 0; + Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0; + Value *V1 = nullptr, *V2 = nullptr; C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) @@ -2028,7 +2036,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. - ConstantInt *C3 = 0, *C4 = 0; + ConstantInt *C3 = nullptr, *C4 = nullptr; if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && (C3->getValue() & ~C1->getValue()) == 0 && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && @@ -2220,7 +2228,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Since this OR statement hasn't been optimized further yet, we hope // that this transformation will allow the new ORs to be optimized. { - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { @@ -2230,13 +2238,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } Instruction *InstCombiner::visitXor(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyXorInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -2494,5 +2505,5 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0bc3ac76c9..d4b583b3a7 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -22,6 +22,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumSimplified, "Number of library calls simplified"); /// getPromotedType - Return the specified type promoted as it would be to pass @@ -70,7 +72,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); - if (MemOpLength == 0) return 0; + if (!MemOpLength) return nullptr; // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. @@ -80,7 +82,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { assert(Size && "0-sized memory transferring should be removed already."); if (Size > 8 || (Size&(Size-1))) - return 0; // If not 1/2/4/8 bytes, exit. + return nullptr; // If not 1/2/4/8 bytes, exit. // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = @@ -99,7 +101,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // dest address will be promotable. See if we can find a better type than the // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); - MDNode *CopyMD = 0; + MDNode *CopyMD = nullptr; if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); @@ -163,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) - return 0; + return nullptr; uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); assert(Len && "0-sized memory setting should be removed already."); @@ -191,7 +193,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return MI; } - return 0; + return nullptr; } /// visitCallInst - CallInst simplification. This mostly only handles folding @@ -233,7 +235,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // No other transformations apply to volatile transfers. if (MI->isVolatile()) - return 0; + return nullptr; // If we have a memmove and the source operation is a constant global, // then the source and dest pointers can't alias, so we can change this @@ -276,11 +278,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint64_t Size; if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); - return 0; + return nullptr; } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); - Value *X = 0; + Value *X = nullptr; // bswap(bswap(x)) -> x if (match(IIOperand, m_BSwap(m_Value(X)))) @@ -320,7 +322,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) @@ -338,7 +340,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) @@ -353,14 +355,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -447,10 +449,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; @@ -554,6 +556,79 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + // Constant fold <A x Bi> << Ci. + // FIXME: We don't handle _dq because it's a shift of an i128, but is + // represented in the IR as <2 x i64>. A per element shift is wrong. + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: { + // Simplify if count is constant. To 0 if >= BitWidth, + // otherwise to shl/lshr. + auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); + auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); + if (!CDV && !CInt) + break; + ConstantInt *Count; + if (CDV) + Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); + else + Count = CInt; + + auto Vec = II->getArgOperand(0); + auto VT = cast<VectorType>(Vec->getType()); + if (Count->getZExtValue() > + VT->getElementType()->getPrimitiveSizeInBits() - 1) + return ReplaceInstUsesWith( + CI, ConstantAggregateZero::get(Vec->getType())); + + bool isPackedShiftLeft = true; + switch (II->getIntrinsicID()) { + default : break; + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; + } + + unsigned VWidth = VT->getNumElements(); + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + if (isPackedShiftLeft) + return BinaryOperator::CreateShl(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + + return BinaryOperator::CreateLShr(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + } case Intrinsic::x86_sse41_pmovsxbw: case Intrinsic::x86_sse41_pmovsxwd: @@ -576,6 +651,153 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_sse4a_insertqi: { + // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top + // ones undef + // TODO: eventually we should lower this intrinsic to IR + if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) { + if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { + if (CIWidth->equalsInt(64) && CIStart->isZero()) { + Value *Vec = II->getArgOperand(1); + Value *Undef = UndefValue::get(Vec->getType()); + const uint32_t Mask[] = { 0, 2 }; + return ReplaceInstUsesWith( + CI, + Builder->CreateShuffleVector( + Vec, Undef, ConstantDataVector::get( + II->getContext(), ArrayRef<uint32_t>(Mask)))); + + } else if (auto Source = + dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { + if (Source->hasOneUse() && + Source->getArgOperand(1) == II->getArgOperand(1)) { + // If the source of the insert has only one use and it's another + // insert (and they're both inserting from the same vector), try to + // bundle both together. + auto CISourceWidth = + dyn_cast<ConstantInt>(Source->getArgOperand(2)); + auto CISourceStart = + dyn_cast<ConstantInt>(Source->getArgOperand(3)); + if (CISourceStart && CISourceWidth) { + unsigned Start = CIStart->getZExtValue(); + unsigned Width = CIWidth->getZExtValue(); + unsigned End = Start + Width; + unsigned SourceStart = CISourceStart->getZExtValue(); + unsigned SourceWidth = CISourceWidth->getZExtValue(); + unsigned SourceEnd = SourceStart + SourceWidth; + unsigned NewStart, NewWidth; + bool ShouldReplace = false; + if (Start <= SourceStart && SourceStart <= End) { + NewStart = Start; + NewWidth = std::max(End, SourceEnd) - NewStart; + ShouldReplace = true; + } else if (SourceStart <= Start && Start <= SourceEnd) { + NewStart = SourceStart; + NewWidth = std::max(SourceEnd, End) - NewStart; + ShouldReplace = true; + } + + if (ShouldReplace) { + Constant *ConstantWidth = ConstantInt::get( + II->getArgOperand(2)->getType(), NewWidth, false); + Constant *ConstantStart = ConstantInt::get( + II->getArgOperand(3)->getType(), NewStart, false); + Value *Args[4] = { Source->getArgOperand(0), + II->getArgOperand(1), ConstantWidth, + ConstantStart }; + Module *M = CI.getParent()->getParent()->getParent(); + Value *F = + Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); + return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); + } + } + } + } + } + } + break; + } + + case Intrinsic::x86_sse41_pblendvb: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_avx_blendv_ps_256: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx2_pblendvb: { + // Convert blendv* to vector selects if the mask is constant. + // This optimization is convoluted because the intrinsic is defined as + // getting a vector of floats or doubles for the ps and pd versions. + // FIXME: That should be changed. + Value *Mask = II->getArgOperand(2); + if (auto C = dyn_cast<ConstantDataVector>(Mask)) { + auto Tyi1 = Builder->getInt1Ty(); + auto SelectorType = cast<VectorType>(Mask->getType()); + auto EltTy = SelectorType->getElementType(); + unsigned Size = SelectorType->getNumElements(); + unsigned BitWidth = + EltTy->isFloatTy() + ? 32 + : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); + assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && + "Wrong arguments for variable blend intrinsic"); + SmallVector<Constant *, 32> Selectors; + for (unsigned I = 0; I < Size; ++I) { + // The intrinsics only read the top bit + uint64_t Selector; + if (BitWidth == 8) + Selector = C->getElementAsInteger(I); + else + Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); + Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); + } + auto NewSelector = ConstantVector::get(Selectors); + return SelectInst::Create(NewSelector, II->getArgOperand(1), + II->getArgOperand(0), "blendv"); + } else { + break; + } + } + + case Intrinsic::x86_avx_vpermilvar_ps: + case Intrinsic::x86_avx_vpermilvar_ps_256: + case Intrinsic::x86_avx_vpermilvar_pd: + case Intrinsic::x86_avx_vpermilvar_pd_256: { + // Convert vpermil* to shufflevector if the mask is constant. + Value *V = II->getArgOperand(1); + unsigned Size = cast<VectorType>(V->getType())->getNumElements(); + assert(Size == 8 || Size == 4 || Size == 2); + uint32_t Indexes[8]; + if (auto C = dyn_cast<ConstantDataVector>(V)) { + // The intrinsics only read one or two bits, clear the rest. + for (unsigned I = 0; I < Size; ++I) { + uint32_t Index = C->getElementAsInteger(I) & 0x3; + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) + Index >>= 1; + Indexes[I] = Index; + } + } else if (isa<ConstantAggregateZero>(V)) { + for (unsigned I = 0; I < Size; ++I) + Indexes[I] = 0; + } else { + break; + } + // The _256 variants are a bit trickier since the mask bits always index + // into the corresponding 128 half. In order to convert to a generic + // shuffle, we have to make that explicit. + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { + for (unsigned I = Size / 2; I < Size; ++I) + Indexes[I] += Size / 2; + } + auto NewC = + ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); + auto V1 = II->getArgOperand(0); + auto V2 = UndefValue::get(V1->getType()); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); + return ReplaceInstUsesWith(CI, Shuffle); + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { @@ -586,8 +808,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { bool AllEltsOk = true; for (unsigned i = 0; i != 16; ++i) { Constant *Elt = Mask->getAggregateElement(i); - if (Elt == 0 || - !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { + if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { AllEltsOk = false; break; } @@ -612,7 +833,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - if (ExtractedElts[Idx] == 0) { + if (!ExtractedElts[Idx]) { ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, Builder->getInt32(Idx&15)); @@ -655,8 +876,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: { + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: { Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); @@ -667,7 +888,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Check for constant LHS & RHS - in this case we just simplify. bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || - II->getIntrinsicID() == Intrinsic::arm64_neon_umull); + II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); VectorType *NewVT = cast<VectorType>(II->getType()); if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { @@ -776,14 +997,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { - if (CI->getCalledFunction() == 0) return 0; + if (!CI->getCalledFunction()) return nullptr; if (Value *With = Simplifier->optimizeCall(CI)) { ++NumSimplified; return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); } - return 0; + return nullptr; } static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { @@ -792,35 +1013,35 @@ static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { Value *Underlying = TrampMem->stripPointerCasts(); if (Underlying != TrampMem && (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) - return 0; + return nullptr; if (!isa<AllocaInst>(Underlying)) - return 0; + return nullptr; - IntrinsicInst *InitTrampoline = 0; + IntrinsicInst *InitTrampoline = nullptr; for (User *U : TrampMem->users()) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); if (!II) - return 0; + return nullptr; if (II->getIntrinsicID() == Intrinsic::init_trampoline) { if (InitTrampoline) // More than one init_trampoline writes to this value. Give up. - return 0; + return nullptr; InitTrampoline = II; continue; } if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) // Allow any number of calls to adjust.trampoline. continue; - return 0; + return nullptr; } // No call to init.trampoline found. if (!InitTrampoline) - return 0; + return nullptr; // Check that the alloca is being used in the expected way. if (InitTrampoline->getOperand(0) != TrampMem) - return 0; + return nullptr; return InitTrampoline; } @@ -837,9 +1058,9 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, II->getOperand(0) == TrampMem) return II; if (Inst->mayWriteToMemory()) - return 0; + return nullptr; } - return 0; + return nullptr; } // Given a call to llvm.adjust.trampoline, find and return the corresponding @@ -851,7 +1072,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); if (!AdjustTramp || AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) - return 0; + return nullptr; Value *TrampMem = AdjustTramp->getOperand(0); @@ -859,7 +1080,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { return IT; if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) return IT; - return 0; + return nullptr; } // visitCallSite - Improvements for call and invoke instructions. @@ -874,7 +1095,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) - return 0; + return nullptr; if (Function *CalleeF = dyn_cast<Function>(Callee)) // If the call and callee calling conventions don't match, this call must @@ -899,7 +1120,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // change the callee to a null pointer. cast<InvokeInst>(OldCall)->setCalledFunction( Constant::getNullValue(CalleeF->getType())); - return 0; + return nullptr; } if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { @@ -911,7 +1132,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (isa<InvokeInst>(CS.getInstruction())) { // Can't remove an invoke because we cannot change the CFG. - return 0; + return nullptr; } // This instruction is not reachable, just remove it. We insert a store to @@ -959,7 +1180,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (I) return EraseInstFromFunction(*I); } - return Changed ? CS.getInstruction() : 0; + return Changed ? CS.getInstruction() : nullptr; } // transformConstExprCastCall - If the callee is a constexpr cast of a function, @@ -968,7 +1189,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { bool InstCombiner::transformConstExprCastCall(CallSite CS) { Function *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - if (Callee == 0) + if (!Callee) return false; Instruction *Caller = CS.getInstruction(); const AttributeSet &CallerPAL = CS.getAttributes(); @@ -1044,7 +1265,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); - if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || DL == 0) + if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) return false; Type *CurElTy = ActTy->getPointerElementType(); @@ -1235,7 +1456,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. if (Attrs.hasAttrSomewhere(Attribute::Nest)) - return 0; + return nullptr; assert(Tramp && "transformCallThroughTrampoline called with incorrect CallSite."); @@ -1247,7 +1468,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, const AttributeSet &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - Type *NestTy = 0; + Type *NestTy = nullptr; AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index c2b862a5ff..356803ad7c 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear /// expression. If so, decompose it, returning some value X, such that Val is /// X*Scale+Offset. @@ -79,7 +81,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { // This requires DataLayout to get the alloca alignment and size information. - if (!DL) return 0; + if (!DL) return nullptr; PointerType *PTy = cast<PointerType>(CI.getType()); @@ -89,26 +91,26 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Get the type really allocated and the type casted to. Type *AllocElTy = AI.getAllocatedType(); Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy); unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; + if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the // same, we open the door to infinite loops of various kinds. - if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; + if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy); uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy); - if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array // size argument. @@ -120,10 +122,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr; unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; + Value *Amt = nullptr; if (Scale == 1) { Amt = NumElements; } else { @@ -141,6 +143,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); + New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it @@ -169,7 +172,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // Otherwise, it must be an instruction. Instruction *I = cast<Instruction>(V); - Instruction *Res = 0; + Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { case Instruction::Add: @@ -245,11 +248,11 @@ isEliminableCastPair( Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(SrcTy) : 0; + DL->getIntPtrType(SrcTy) : nullptr; Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(MidTy) : 0; + DL->getIntPtrType(MidTy) : nullptr; Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(DstTy) : 0; + DL->getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -318,7 +321,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return NV; } - return 0; + return nullptr; } /// CanEvaluateTruncated - Return true if we can evaluate the specified @@ -470,7 +473,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { } // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. - Value *A = 0; ConstantInt *Cst = 0; + Value *A = nullptr; ConstantInt *Cst = nullptr; if (Src->hasOneUse() && match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { // We have three types to worry about here, the type of A, the source of @@ -502,7 +505,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { ConstantExpr::getTrunc(Cst, CI.getType())); } - return 0; + return nullptr; } /// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations @@ -550,7 +553,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -598,8 +601,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); + computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS); + computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -627,7 +630,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, } } - return 0; + return nullptr; } /// CanEvaluateZExtd - Determine if the specified value can be computed in the @@ -758,7 +761,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back())) - return 0; + return nullptr; // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) @@ -883,7 +886,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - return 0; + return nullptr; } /// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations @@ -918,7 +921,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero, KnownOne); + computeKnownBits(Op0, KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { @@ -967,7 +970,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { } } - return 0; + return nullptr; } /// CanEvaluateSExtd - Return true if we can take the specified value @@ -1039,7 +1042,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // If this sign extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back())) - return 0; + return nullptr; if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1107,9 +1110,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // into: // %a = shl i32 %i, 30 // %d = ashr i32 %a, 30 - Value *A = 0; + Value *A = nullptr; // TODO: Eventually this could be subsumed by EvaluateInDifferentType. - ConstantInt *BA = 0, *CA = 0; + ConstantInt *BA = nullptr, *CA = nullptr; if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), m_ConstantInt(CA))) && BA == CA && A->getType() == CI.getType()) { @@ -1121,7 +1124,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return BinaryOperator::CreateAShr(A, ShAmtV); } - return 0; + return nullptr; } @@ -1133,7 +1136,7 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); if (!losesInfo) return ConstantFP::get(CFP->getContext(), F); - return 0; + return nullptr; } /// LookThroughFPExtensions - If this is an fp extension instruction, look @@ -1345,7 +1348,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitFPExt(CastInst &CI) { @@ -1354,7 +1357,7 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) { Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); // fptoui(uitofp(X)) --> X @@ -1374,7 +1377,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); // fptosi(sitofp(X)) --> X @@ -1421,7 +1424,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - return 0; + return nullptr; } /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) @@ -1520,7 +1523,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) - return 0; + return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); @@ -1598,7 +1601,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, ElementIndex = Elements.size() - ElementIndex - 1; // Fail if multiple elements are inserted into this slot. - if (Elements[ElementIndex] != 0) + if (Elements[ElementIndex]) return false; Elements[ElementIndex] = V; @@ -1638,7 +1641,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, if (!V->hasOneUse()) return false; Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return false; + if (!I) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: @@ -1659,7 +1662,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), Shift, @@ -1687,7 +1690,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return 0; + if (!IC.getDataLayout()) return nullptr; VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); @@ -1695,14 +1698,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, DestVecTy->getElementType(), IC)) - return 0; + return nullptr; // If we succeeded, we know that all of the element are specified by Elements // or are zero if Elements has a null entry. Recast this as a set of // insertions. Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - if (Elements[i] == 0) continue; // Unset element. + if (!Elements[i]) continue; // Unset element. Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); @@ -1716,14 +1719,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return 0; + if (!IC.getDataLayout()) return nullptr; Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); // If this is a bitcast from int to float, check to see if the int is an // extraction from a vector. - Value *VecInput = 0; + Value *VecInput = nullptr; // bitcast(trunc(bitcast(somevector))) if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && isa<VectorType>(VecInput->getType())) { @@ -1747,7 +1750,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } // bitcast(trunc(lshr(bitcast(somevector), cst)) - ConstantInt *ShAmt = 0; + ConstantInt *ShAmt = nullptr; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), m_ConstantInt(ShAmt)))) && isa<VectorType>(VecInput->getType())) { @@ -1769,7 +1772,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } - return 0; + return nullptr; } Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8c0ad52598..02e8bf1013 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -24,6 +24,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + static ConstantInt *getOne(Constant *C) { return ConstantInt::get(cast<IntegerType>(C->getType()), 1); } @@ -218,15 +220,15 @@ Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && DL == 0) - return 0; + if (!GEP->isInBounds() && !DL) + return nullptr; Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) - return 0; + return nullptr; uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); - if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. + if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays. // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. @@ -236,7 +238,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, !isa<ConstantInt>(GEP->getOperand(1)) || !cast<ConstantInt>(GEP->getOperand(1))->isZero() || isa<Constant>(GEP->getOperand(2))) - return 0; + return nullptr; // Check that indices after the variable are constants and in-range for the // type they index. Collect the indices. This is typically for arrays of @@ -246,18 +248,18 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (Idx == 0) return 0; // Variable index. + if (!Idx) return nullptr; // Variable index. uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. + if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return 0; + if (IdxVal >= ATy->getNumElements()) return nullptr; EltTy = ATy->getElementType(); } else { - return 0; // Unknown type. + return nullptr; // Unknown type. } LaterIndices.push_back(IdxVal); @@ -296,7 +298,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { Constant *Elt = Init->getAggregateElement(i); - if (Elt == 0) return 0; + if (!Elt) return nullptr; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) @@ -321,7 +323,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa<ConstantInt>(C)) return 0; + if (!isa<ConstantInt>(C)) return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -375,7 +377,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && FalseRangeEnd == Overdefined) - return 0; + return nullptr; } // Now that we've scanned the entire array, emit our new comparison(s). We @@ -467,7 +469,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 { - Type *Ty = 0; + Type *Ty = nullptr; // Look for an appropriate type: // - The type of Idx if the magic fits @@ -480,7 +482,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - if (Ty != 0) { + if (Ty) { Value *V = Builder->CreateIntCast(Idx, Ty, false); V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); @@ -488,7 +490,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } } - return 0; + return nullptr; } @@ -533,7 +535,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. - if (i == e) return 0; + if (i == e) return nullptr; Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is @@ -543,7 +545,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!CI) return 0; + if (!CI) return nullptr; // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; @@ -587,7 +589,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // multiple of the variable scale. int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) - return 0; + return nullptr; // Okay, we can do this evaluation. Start by converting the index to intptr. if (VariableIdx->getType() != IntPtrTy) @@ -608,7 +610,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be // the maximum signed value for the pointer type. if (ICmpInst::isSigned(Cond)) - return 0; + return nullptr; // Look through bitcasts. if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) @@ -623,7 +625,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); // If not, synthesize the offset the hard way. - if (Offset == 0) + if (!Offset) Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, Constant::getNullValue(Offset->getType())); @@ -661,7 +663,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Otherwise, the base pointers are different and the indices are // different, bail out. - return 0; + return nullptr; } // If one of the GEPs has all zero indices, recurse. @@ -729,7 +731,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); } } - return 0; + return nullptr; } /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". @@ -812,11 +814,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) - return 0; + return nullptr; if (DivRHS->isZero()) - return 0; // The ProdOV computation fails on divide by zero. + return nullptr; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) - return 0; // The overflow computation also screws up here + return nullptr; // The overflow computation also screws up here if (DivRHS->isOne()) { // This eliminates some funny cases with INT_MIN. ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. @@ -850,7 +852,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // overflow variable is set to 0 if it's corresponding bound variable is valid // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = 0, *HiBound = 0; + Constant *LoBound = nullptr, *HiBound = nullptr; if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) @@ -890,7 +892,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) @@ -964,20 +966,20 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, uint32_t TypeBits = CmpRHSV.getBitWidth(); uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) - return 0; + return nullptr; if (!ICI.isEquality()) { // If we have an unsigned comparison and an ashr, we can't simplify this. // Similarly for signed comparisons with lshr. if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) - return 0; + return nullptr; // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv // by a power of 2. Since we already have logic to simplify these, // transform to div and then simplify the resultant comparison. if (Shr->getOpcode() == Instruction::AShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) - return 0; + return nullptr; // Revisit the shift (to delete it). Worklist.Add(Shr); @@ -994,7 +996,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, // If the builder folded the binop, just return it. BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); - if (TheDiv == 0) + if (!TheDiv) return &ICI; // Otherwise, fold this div/compare. @@ -1037,7 +1039,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, Mask, Shr->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); } - return 0; + return nullptr; } @@ -1056,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne); // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { @@ -1181,10 +1183,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // access. BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) - Shift = 0; + Shift = nullptr; ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; + ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr; // This seemingly simple opportunity to fold away a shift turns out to // be rather complicated. See PR17827 @@ -1777,7 +1779,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } - return 0; + return nullptr; } /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). @@ -1794,7 +1796,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // integer type is the same size as the pointer type. if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { - Value *RHSOp = 0; + Value *RHSOp = nullptr; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { @@ -1812,7 +1814,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && LHSCI->getOpcode() != Instruction::SExt) - return 0; + return nullptr; bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; bool isSignedCmp = ICI.isSigned(); @@ -1821,12 +1823,12 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); if (RHSCIOp->getType() != LHSCIOp->getType()) - return 0; + return nullptr; // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) - return 0; + return nullptr; // Deal with equality cases early. if (ICI.isEquality()) @@ -1844,7 +1846,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // If we aren't dealing with a constant on the RHS, exit early ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); if (!CI) - return 0; + return nullptr; // Compute the constant that would happen if we truncated to SrcTy then // reextended to DestTy. @@ -1873,7 +1875,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // by SimplifyICmpInst, so only deal with the tricky case. if (isSignedCmp || !isSignedExt) - return 0; + return nullptr; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. @@ -1909,12 +1911,12 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // In order to eliminate the add-with-constant, the compare can be its only // use. Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); - if (!AddWithCst->hasOneUse()) return 0; + if (!AddWithCst->hasOneUse()) return nullptr; // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. - if (!CI2->getValue().isPowerOf2()) return 0; + if (!CI2->getValue().isPowerOf2()) return nullptr; unsigned NewWidth = CI2->getValue().countTrailingZeros(); - if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr; // The width of the new add formed is 1 more than the bias. ++NewWidth; @@ -1922,7 +1924,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // Check to see that CI1 is an all-ones value with NewWidth bits. if (CI1->getBitWidth() == NewWidth || CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) - return 0; + return nullptr; // This is only really a signed overflow check if the inputs have been // sign-extended; check for that condition. For example, if CI2 is 2^31 and @@ -1930,7 +1932,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; if (IC.ComputeNumSignBits(A) < NeededSignBits || IC.ComputeNumSignBits(B) < NeededSignBits) - return 0; + return nullptr; // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant @@ -1946,8 +1948,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // original add had another add which was then immediately truncated, we // could still do the transformation. TruncInst *TI = dyn_cast<TruncInst>(U); - if (TI == 0 || - TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) + return nullptr; } // If the pattern matches, truncate the inputs to the narrower type and @@ -1983,11 +1985,11 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, InstCombiner &IC) { // Don't bother doing this transformation for pointers, don't do it for // vectors. - if (!isa<IntegerType>(OrigAddV->getType())) return 0; + if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; // If the add is a constant expr, then we don't bother transforming it. Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); - if (OrigAdd == 0) return 0; + if (!OrigAdd) return nullptr; Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); @@ -2008,6 +2010,236 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, return ExtractValueInst::Create(Call, 1, "uadd.overflow"); } +/// \brief Recognize and process idiom involving test for multiplication +/// overflow. +/// +/// The caller has matched a pattern of the form: +/// I = cmp u (mul(zext A, zext B), V +/// The function checks if this is a test for overflow and if so replaces +/// multiplication with call to 'mul.with.overflow' intrinsic. +/// +/// \param I Compare instruction. +/// \param MulVal Result of 'mult' instruction. It is one of the arguments of +/// the compare instruction. Must be of integer type. +/// \param OtherVal The other argument of compare instruction. +/// \returns Instruction which must replace the compare instruction, NULL if no +/// replacement required. +static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, + Value *OtherVal, InstCombiner &IC) { + assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); + assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); + assert(isa<IntegerType>(MulVal->getType())); + Instruction *MulInstr = cast<Instruction>(MulVal); + assert(MulInstr->getOpcode() == Instruction::Mul); + + Instruction *LHS = cast<Instruction>(MulInstr->getOperand(0)), + *RHS = cast<Instruction>(MulInstr->getOperand(1)); + assert(LHS->getOpcode() == Instruction::ZExt); + assert(RHS->getOpcode() == Instruction::ZExt); + Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); + + // Calculate type and width of the result produced by mul.with.overflow. + Type *TyA = A->getType(), *TyB = B->getType(); + unsigned WidthA = TyA->getPrimitiveSizeInBits(), + WidthB = TyB->getPrimitiveSizeInBits(); + unsigned MulWidth; + Type *MulType; + if (WidthB > WidthA) { + MulWidth = WidthB; + MulType = TyB; + } else { + MulWidth = WidthA; + MulType = TyA; + } + + // In order to replace the original mul with a narrower mul.with.overflow, + // all uses must ignore upper bits of the product. The number of used low + // bits must be not greater than the width of mul.with.overflow. + if (MulVal->hasNUsesOrMore(2)) + for (User *U : MulVal->users()) { + if (U == &I) + continue; + if (TruncInst *TI = dyn_cast<TruncInst>(U)) { + // Check if truncation ignores bits above MulWidth. + unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); + if (TruncWidth > MulWidth) + return nullptr; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { + // Check if AND ignores bits above MulWidth. + if (BO->getOpcode() != Instruction::And) + return nullptr; + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { + const APInt &CVal = CI->getValue(); + if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) + return nullptr; + } + } else { + // Other uses prohibit this transformation. + return nullptr; + } + } + + // Recognize patterns + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp eq/neq mulval, zext trunc mulval + if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal)) + if (Zext->hasOneUse()) { + Value *ZextArg = Zext->getOperand(0); + if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg)) + if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) + break; //Recognized + } + + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. + ConstantInt *CI; + Value *ValToMask; + if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { + if (ValToMask != MulVal) + return nullptr; + const APInt &CVal = CI->getValue() + 1; + if (CVal.isPowerOf2()) { + unsigned MaskWidth = CVal.logBase2(); + if (MaskWidth == MulWidth) + break; // Recognized + } + } + return nullptr; + + case ICmpInst::ICMP_UGT: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ugt mulval, max + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getMaxValue(MulWidth); + MaxVal = MaxVal.zext(CI->getBitWidth()); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_UGE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp uge mulval, max+1 + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_ULE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ule mulval, max + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getMaxValue(MulWidth); + MaxVal = MaxVal.zext(CI->getBitWidth()); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_ULT: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ule mulval, max + 1 + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + default: + return nullptr; + } + + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(MulInstr); + Module *M = I.getParent()->getParent()->getParent(); + + // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) + Value *MulA = A, *MulB = B; + if (WidthA < MulWidth) + MulA = Builder->CreateZExt(A, MulType); + if (WidthB < MulWidth) + MulB = Builder->CreateZExt(B, MulType); + Value *F = + Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); + CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul"); + IC.Worklist.Add(MulInstr); + + // If there are uses of mul result other than the comparison, we know that + // they are truncation or binary AND. Change them to use result of + // mul.with.overflow and adjust properly mask/size. + if (MulVal->hasNUsesOrMore(2)) { + Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); + for (User *U : MulVal->users()) { + if (U == &I || U == OtherVal) + continue; + if (TruncInst *TI = dyn_cast<TruncInst>(U)) { + if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) + IC.ReplaceInstUsesWith(*TI, Mul); + else + TI->setOperand(0, Mul); + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { + assert(BO->getOpcode() == Instruction::And); + // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + APInt ShortMask = CI->getValue().trunc(MulWidth); + Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); + Instruction *Zext = + cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType())); + IC.Worklist.Add(Zext); + IC.ReplaceInstUsesWith(*BO, Zext); + } else { + llvm_unreachable("Unexpected Binary operation"); + } + IC.Worklist.Add(cast<Instruction>(U)); + } + } + if (isa<Instruction>(OtherVal)) + IC.Worklist.Add(cast<Instruction>(OtherVal)); + + // The original icmp gets replaced with the overflow value, maybe inverted + // depending on predicate. + bool Inverse = false; + switch (I.getPredicate()) { + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_EQ: + Inverse = true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (I.getOperand(0) == MulVal) + break; + Inverse = true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (I.getOperand(1) == MulVal) + break; + Inverse = true; + break; + default: + llvm_unreachable("Unexpected predicate"); + } + if (Inverse) { + Value *Res = Builder->CreateExtractValue(Call, 1); + return BinaryOperator::CreateNot(Res); + } + + return ExtractValueInst::Create(Call, 1); +} + // DemandedBitsLHSMask - When performing a comparison against a constant, // it is possible that not all the bits in the LHS are demanded. This helper // method computes the mask that IS demanded. @@ -2178,7 +2410,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic function. The source performs an @@ -2293,15 +2525,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op0KnownZeroInverted = ~Op0KnownZero; if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { // If the LHS is an AND with the same constant, look through it. - Value *LHS = 0; - ConstantInt *LHSC = 0; + Value *LHS = nullptr; + ConstantInt *LHSC = nullptr; if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". - Value *X = 0; + Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); return new ICmpInst(ICmpInst::ICMP_NE, X, @@ -2330,15 +2562,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op0KnownZeroInverted = ~Op0KnownZero; if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { // If the LHS is an AND with the same constant, look through it. - Value *LHS = 0; - ConstantInt *LHSC = 0; + Value *LHS = nullptr; + ConstantInt *LHSC = nullptr; if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". - Value *X = 0; + Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); return new ICmpInst(ICmpInst::ICMP_EQ, X, @@ -2470,7 +2702,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin())) if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) - return 0; + return nullptr; // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. @@ -2506,7 +2738,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If either operand of the select is a constant, we can fold the // comparison into the select arms, which will cause one to be // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; + Value *Op1 = nullptr, *Op2 = nullptr; if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) @@ -2618,7 +2850,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Analyze the case when either Op0 or Op1 is an add instruction. // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; if (BO0 && BO0->getOpcode() == Instruction::Add) A = BO0->getOperand(0), B = BO0->getOperand(1); if (BO1 && BO1->getOpcode() == Instruction::Add) @@ -2713,7 +2945,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Analyze the case when either Op0 or Op1 is a sub instruction. // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). - A = 0; B = 0; C = 0; D = 0; + A = nullptr; B = nullptr; C = nullptr; D = nullptr; if (BO0 && BO0->getOpcode() == Instruction::Sub) A = BO0->getOperand(0), B = BO0->getOperand(1); if (BO1 && BO1->getOpcode() == Instruction::Sub) @@ -2739,7 +2971,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO0->hasOneUse() && BO1->hasOneUse()) return new ICmpInst(Pred, D, B); - BinaryOperator *SRem = NULL; + // icmp (0-X) < cst --> x > -cst + if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { + Value *X; + if (match(BO0, m_Neg(m_Value(X)))) + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) + if (!RHSC->isMinValue(/*isSigned=*/true)) + return new ICmpInst(I.getSwappedPredicate(), X, + ConstantExpr::getNeg(RHSC)); + } + + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -2877,6 +3119,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { (Op0 == A || Op0 == B)) if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) return R; + + // (zext a) * (zext b) --> llvm.umul.with.overflow. + if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { + if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this)) + return R; + } + if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { + if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this)) + return R; + } } if (I.isEquality()) { @@ -2918,7 +3170,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { - Value *X = 0, *Y = 0, *Z = 0; + Value *X = nullptr, *Y = nullptr, *Z = nullptr; if (A == C) { X = B; Y = D; Z = A; @@ -3009,7 +3261,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. @@ -3017,13 +3269,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, Constant *RHSC) { - if (!isa<ConstantFP>(RHSC)) return 0; + if (!isa<ConstantFP>(RHSC)) return nullptr; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); - if (MantissaWidth == -1) return 0; // Unknown. + if (MantissaWidth == -1) return nullptr; // Unknown. // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. @@ -3037,7 +3289,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) - return 0; + return nullptr; // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is @@ -3383,5 +3635,5 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), RHSExt->getOperand(0)); - return Changed ? &I : 0; + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index dcc8b0f84e..66d09388f4 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,6 +20,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); @@ -29,10 +31,13 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); static bool pointsToConstantGlobal(Value *V) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) return pointsToConstantGlobal(CE->getOperand(0)); + } return false; } @@ -60,9 +65,9 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } - if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset)) return false; continue; } @@ -112,7 +117,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(I); - if (MI == 0) + if (!MI) return false; // If the transfer is using the alloca as a source of the transfer, then @@ -148,10 +153,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, static MemTransferInst * isOnlyCopiedFromConstantGlobal(AllocaInst *AI, SmallVectorImpl<Instruction *> &ToDelete) { - MemTransferInst *TheCopy = 0; + MemTransferInst *TheCopy = nullptr; if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) return TheCopy; - return 0; + return nullptr; } Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { @@ -172,7 +177,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); + AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -295,7 +300,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; + return nullptr; Type *SrcPTy = SrcTy->getElementType(); @@ -346,7 +351,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } } } - return 0; + return nullptr; } Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { @@ -373,7 +378,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. - if (!LI.isSimple()) return 0; + if (!LI.isSimple()) return nullptr; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, @@ -455,7 +460,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } } - return 0; + return nullptr; } /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P @@ -467,12 +472,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); - if (SrcTy == 0) return 0; + if (!SrcTy) return nullptr; Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) - return 0; + return nullptr; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: @@ -506,20 +511,20 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) - return 0; + return nullptr; // If the pointers point into different address spaces don't do the // transformation. if (SrcTy->getAddressSpace() != cast<PointerType>(CI->getType())->getAddressSpace()) - return 0; + return nullptr; // If the pointers point to values of different sizes don't do the // transformation. if (!IC.getDataLayout() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) - return 0; + return nullptr; // If the pointers point to pointers to different address spaces don't do the // transformation. It is not safe to introduce an addrspacecast instruction in @@ -527,7 +532,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // cast. if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) - return 0; + return nullptr; // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before @@ -607,7 +612,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Don't hack volatile/atomic stores. // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return 0; + if (!SI.isSimple()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -674,7 +679,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *U = dyn_cast<Instruction>(Val)) Worklist.Add(U); // Dropped a use. } - return 0; // Do not modify these! + return nullptr; // Do not modify these! } // store undef, Ptr -> noop @@ -703,9 +708,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! + return nullptr; // xform done! - return 0; + return nullptr; } /// SimplifyStoreAtEndOfBlock - Turn things like: @@ -728,7 +733,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; - BasicBlock *OtherBB = 0; + BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; @@ -758,7 +763,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; + StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 71fbb6cda6..9996ebc2e7 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// simplifyValueKnownNonZero - The specific integer value is used in a context /// where it is known to be non-zero. If this allows us to simplify the @@ -27,13 +29,13 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. - if (!V->hasOneUse()) return 0; + if (!V->hasOneUse()) return nullptr; bool MadeChange = false; // ((1 << A) >>u B) --> (1 << (A-B)) // Because V cannot be zero, we know that B is less than A. - Value *A = 0, *B = 0, *PowerOf2 = 0; + Value *A = nullptr, *B = nullptr, *PowerOf2 = nullptr; if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. @@ -68,7 +70,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // If V is a phi node, we can call this on each of its operands. // "select cond, X, 0" can simplify to "X". - return MadeChange ? V : 0; + return MadeChange ? V : nullptr; } @@ -107,7 +109,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) { Constant *Elt = CV->getElementAsConstant(I); if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2()) - return 0; + return nullptr; Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2())); } @@ -118,6 +120,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyMulInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -139,7 +144,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2)); if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { - Constant *NewCst = 0; + Constant *NewCst = nullptr; if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2()) // Replace X*(2^C) with X << C, where C is either a scalar or a splat. NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2()); @@ -165,10 +170,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { const APInt & Val = CI->getValue(); const APInt &PosVal = Val.abs(); if (Val.isNegative() && PosVal.isPowerOf2()) { - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (Op0->hasOneUse()) { ConstantInt *C1; - Value *Sub = 0; + Value *Sub = nullptr; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) Sub = Builder->CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) @@ -268,7 +273,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - Value *BoolCast = 0, *OtherOp = 0; + Value *BoolCast = nullptr, *OtherOp = nullptr; if (MaskedValueIsZero(Op0, Negative2)) BoolCast = Op0, OtherOp = Op1; else if (MaskedValueIsZero(Op1, Negative2)) @@ -281,7 +286,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } // @@ -384,7 +389,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C, Constant *C0 = dyn_cast<Constant>(Opnd0); Constant *C1 = dyn_cast<Constant>(Opnd1); - BinaryOperator *R = 0; + BinaryOperator *R = nullptr; // (X * C0) * C => X * (C0*C) if (FMulOrDiv->getOpcode() == Instruction::FMul) { @@ -426,6 +431,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (isa<Constant>(Op0)) std::swap(Op0, Op1); @@ -483,7 +491,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Value *M1 = ConstantExpr::getFMul(C1, C); Value *M0 = isNormalFp(cast<Constant>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : - 0; + nullptr; if (M0 && M1) { if (Swap && FAddSub->getOpcode() == Instruction::FSub) std::swap(M0, M1); @@ -503,8 +511,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // Under unsafe algebra do: // X * log2(0.5*Y) = X*log2(Y) - X if (I.hasUnsafeAlgebra()) { - Value *OpX = NULL; - Value *OpY = NULL; + Value *OpX = nullptr; + Value *OpY = nullptr; IntrinsicInst *Log2; detectLog2OfHalf(Op0, OpY, Log2); if (OpY) { @@ -567,7 +575,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Value *Opnd0_0, *Opnd0_1; if (Opnd0->hasOneUse() && match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { - Value *Y = 0; + Value *Y = nullptr; if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) Y = Opnd0_1; else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) @@ -621,7 +629,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { break; } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select @@ -682,12 +690,12 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { // If we past the instruction, quit looking for it. if (&*BBI == SI) - SI = 0; + SI = nullptr; if (&*BBI == SelectCond) - SelectCond = 0; + SelectCond = nullptr; // If we ran out of things to eliminate, break out of the loop. - if (SelectCond == 0 && SI == 0) + if (!SelectCond && !SI) break; } @@ -719,7 +727,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv)) + I.getOpcode() == Instruction::SDiv)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), ConstantExpr::getMul(RHS, LHSRHS)); @@ -735,12 +743,31 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } + if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) { + if (One->isOne() && !I.getType()->isIntegerTy(1)) { + bool isSigned = I.getOpcode() == Instruction::SDiv; + if (isSigned) { + // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the + // result is one, if Op1 is -1 then the result is minus one, otherwise + // it's zero. + Value *Inc = Builder->CreateAdd(Op1, One); + Value *Cmp = Builder->CreateICmpULT( + Inc, ConstantInt::get(I.getType(), 3)); + return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); + } else { + // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the + // result is one, otherwise it's zero. + return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType()); + } + } + } + // See if we can fold away this div instruction. if (SimplifyDemandedInstructionBits(I)) return &I; // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y - Value *X = 0, *Z = 0; + Value *X = nullptr, *Z = nullptr; if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 bool isSigned = I.getOpcode() == Instruction::SDiv; if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || @@ -748,7 +775,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return BinaryOperator::Create(I.getOpcode(), X, Op1); } - return 0; + return nullptr; } /// dyn_castZExtVal - Checks if V is a zext or constant that can @@ -761,7 +788,7 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) { if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) return ConstantExpr::getTrunc(C, Ty); } - return 0; + return nullptr; } namespace { @@ -786,7 +813,7 @@ struct UDivFoldAction { }; UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand) - : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {} + : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {} UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS) : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {} }; @@ -865,7 +892,8 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions)) if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) { - Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1)); + Actions.push_back(UDivFoldAction((FoldUDivOperandCb)nullptr, Op1, + LHSIdx-1)); return Actions.size(); } @@ -875,6 +903,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyUDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -928,12 +959,15 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return Inst; } - return 0; + return nullptr; } Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -983,7 +1017,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } } - return 0; + return nullptr; } /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special @@ -997,7 +1031,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Constant *Divisor, bool AllowReciprocal) { if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors. - return 0; + return nullptr; const APFloat &FpVal = cast<ConstantFP>(Divisor)->getValueAPF(); APFloat Reciprocal(FpVal.getSemantics()); @@ -1010,7 +1044,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, } if (!Cvt) - return 0; + return nullptr; ConstantFP *R; R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); @@ -1020,6 +1054,9 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1037,10 +1074,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return R; if (AllowReassociate) { - Constant *C1 = 0; + Constant *C1 = nullptr; Constant *C2 = Op1C; Value *X; - Instruction *Res = 0; + Instruction *Res = nullptr; if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) { // (X*C1)/C2 => X * (C1/C2) @@ -1071,12 +1108,12 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return T; } - return 0; + return nullptr; } if (AllowReassociate && isa<Constant>(Op0)) { Constant *C1 = cast<Constant>(Op0), *C2; - Constant *Fold = 0; + Constant *Fold = nullptr; Value *X; bool CreateDiv = true; @@ -1098,13 +1135,13 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { R->setFastMathFlags(I.getFastMathFlags()); return R; } - return 0; + return nullptr; } if (AllowReassociate) { Value *X, *Y; - Value *NewInst = 0; - Instruction *SimpR = 0; + Value *NewInst = nullptr; + Instruction *SimpR = nullptr; if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { // (X/Y) / Z => X / (Y*Z) @@ -1140,7 +1177,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { } } - return 0; + return nullptr; } /// This function implements the transforms common to both integer remainder @@ -1176,12 +1213,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyURemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1208,12 +1248,15 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { return ReplaceInstUsesWith(I, Ext); } - return 0; + return nullptr; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1250,7 +1293,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { bool hasMissing = false; for (unsigned i = 0; i != VWidth; ++i) { Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) { + if (!Elt) { hasMissing = true; break; } @@ -1279,12 +1322,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1292,5 +1338,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) return &I; - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0ab657a279..46f7b8a095 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -18,6 +18,8 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] /// and if a/b/c and the add's all have a single use, turn this into a phi /// and a single binop. @@ -48,12 +50,12 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) - return 0; + return nullptr; // If they are CmpInst instructions, check their predicates if (CmpInst *CI = dyn_cast<CmpInst>(I)) if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) - return 0; + return nullptr; if (isNUW) isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); @@ -63,8 +65,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { isExact = cast<PossiblyExactOperator>(I)->isExact(); // Keep track of which operand needs a phi node. - if (I->getOperand(0) != LHSVal) LHSVal = 0; - if (I->getOperand(1) != RHSVal) RHSVal = 0; + if (I->getOperand(0) != LHSVal) LHSVal = nullptr; + if (I->getOperand(1) != RHSVal) RHSVal = nullptr; } // If both LHS and RHS would need a PHI, don't do this transformation, @@ -72,14 +74,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // which leads to higher register pressure. This is especially // bad when the PHIs are in the header of a loop. if (!LHSVal && !RHSVal) - return 0; + return nullptr; // Otherwise, this is safe to transform! Value *InLHS = FirstInst->getOperand(0); Value *InRHS = FirstInst->getOperand(1); - PHINode *NewLHS = 0, *NewRHS = 0; - if (LHSVal == 0) { + PHINode *NewLHS = nullptr, *NewRHS = nullptr; + if (!LHSVal) { NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); @@ -87,7 +89,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { LHSVal = NewLHS; } - if (RHSVal == 0) { + if (!RHSVal) { NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); @@ -148,7 +150,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || GEP->getNumOperands() != FirstInst->getNumOperands()) - return 0; + return nullptr; AllInBounds &= GEP->isInBounds(); @@ -170,19 +172,19 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // for struct indices, which must always be constant. if (isa<ConstantInt>(FirstInst->getOperand(op)) || isa<ConstantInt>(GEP->getOperand(op))) - return 0; + return nullptr; if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) - return 0; + return nullptr; // If we already needed a PHI for an earlier operand, and another operand // also requires a PHI, we'd be introducing more PHIs than we're // eliminating, which increases register pressure on entry to the PHI's // block. if (NeededPhi) - return 0; + return nullptr; - FixedOperands[op] = 0; // Needs a PHI. + FixedOperands[op] = nullptr; // Needs a PHI. NeededPhi = true; } } @@ -194,7 +196,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // load up into the predecessors so that we have a load of a gep of an alloca, // which can usually all be folded into the load. if (AllBasePointersAreAllocas) - return 0; + return nullptr; // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. @@ -288,7 +290,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // FIXME: This is overconservative; this transform is allowed in some cases // for atomic operations. if (FirstLI->isAtomic()) - return 0; + return nullptr; // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently @@ -303,20 +305,20 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // load and the PHI. if (FirstLI->getParent() != PN.getIncomingBlock(0) || !isSafeAndProfitableToSinkLoad(FirstLI)) - return 0; + return nullptr; // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. if (isVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); if (!LI || !LI->hasOneUse()) - return 0; + return nullptr; // We can't sink the load if the loaded value could be modified between // the load and the PHI. @@ -324,12 +326,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LI->getParent() != PN.getIncomingBlock(i) || LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) - return 0; + return nullptr; // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) - return 0; + return nullptr; LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); @@ -338,7 +340,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // the path through the other successor. if (isVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; } // Okay, they are all the same operation. Create a new PHI node of the @@ -354,7 +356,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); if (NewInVal != InVal) - InVal = 0; + InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } @@ -398,8 +400,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // If all input operands to the phi are the same instruction (e.g. a cast from // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. - Constant *ConstantOp = 0; - Type *CastSrcTy = 0; + Constant *ConstantOp = nullptr; + Type *CastSrcTy = nullptr; bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { @@ -409,13 +411,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // the code by turning an i32 into an i1293. if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) { if (!ShouldChangeType(PN.getType(), CastSrcTy)) - return 0; + return nullptr; } } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); - if (ConstantOp == 0) + if (!ConstantOp) return FoldPHIArgBinOpIntoPHI(PN); if (OverflowingBinaryOperator *BO = @@ -426,19 +428,19 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { dyn_cast<PossiblyExactOperator>(FirstInst)) isExact = PEO->isExact(); } else { - return 0; // Cannot fold this operation. + return nullptr; // Cannot fold this operation. } // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); - if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) - return 0; + if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) - return 0; // Cast operation must match. + return nullptr; // Cast operation must match. } else if (I->getOperand(1) != ConstantOp) { - return 0; + return nullptr; } if (isNUW) @@ -462,7 +464,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); if (NewInVal != InVal) - InVal = 0; + InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } @@ -587,10 +589,10 @@ namespace llvm { template<> struct DenseMapInfo<LoweredPHIRecord> { static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(0, 0); + return LoweredPHIRecord(nullptr, 0); } static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(0, 1); + return LoweredPHIRecord(nullptr, 1); } static unsigned getHashValue(const LoweredPHIRecord &Val) { return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ @@ -637,14 +639,14 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // bail out. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); - if (II == 0) continue; + if (!II) continue; if (II->getParent() != PN->getIncomingBlock(i)) continue; // If we have a phi, and if it's directly in the predecessor, then we have // a critical edge where we need to put the truncate. Since we can't // split the edge in instcombine, we have to bail out. - return 0; + return nullptr; } for (User *U : PN->users()) { @@ -667,7 +669,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (UserI->getOpcode() != Instruction::LShr || !UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) || !isa<ConstantInt>(UserI->getOperand(1))) - return 0; + return nullptr; unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue(); PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back())); @@ -705,7 +707,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // If we've already lowered a user like this, reuse the previously lowered // value. - if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) { // Otherwise, Create the new PHI node for this user. EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), @@ -894,5 +896,5 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index e74d912216..9a41e4b940 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -18,16 +18,18 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, /// returning the kind and providing the out parameter results if we /// successfully match. static SelectPatternFlavor MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); - if (SI == 0) return SPF_UNKNOWN; + if (!SI) return SPF_UNKNOWN; ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); - if (ICI == 0) return SPF_UNKNOWN; + if (!ICI) return SPF_UNKNOWN; LHS = ICI->getOperand(0); RHS = ICI->getOperand(1); @@ -129,15 +131,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, if (TI->isCast()) { Type *FIOpndTy = FI->getOperand(0)->getType(); if (TI->getOperand(0)->getType() != FIOpndTy) - return 0; + return nullptr; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) - return 0; + return nullptr; } else { - return 0; // unknown unary op. + return nullptr; // unknown unary op. } // Fold this by inserting a select from the input values. @@ -149,7 +151,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // Only handle binary operators here. if (!isa<BinaryOperator>(TI)) - return 0; + return nullptr; // Figure out if the operations have any operands in common. Value *MatchOp, *OtherOpT, *OtherOpF; @@ -165,7 +167,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, OtherOpF = FI->getOperand(0); MatchIsOpZero = false; } else if (!TI->isCommutative()) { - return 0; + return nullptr; } else if (TI->getOperand(0) == FI->getOperand(1)) { MatchOp = TI->getOperand(0); OtherOpT = TI->getOperand(1); @@ -177,7 +179,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, OtherOpF = FI->getOperand(1); MatchIsOpZero = true; } else { - return 0; + return nullptr; } // If we reach here, they do have operations in common. @@ -282,7 +284,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, } } - return 0; + return nullptr; } /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is @@ -296,7 +298,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, Instruction *I = dyn_cast<Instruction>(V); if (!I) - return 0; + return nullptr; // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { @@ -347,7 +349,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, } } - return 0; + return nullptr; } /// foldSelectICmpAndOr - We want to turn: @@ -368,18 +370,18 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) - return 0; + return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); if (!match(CmpRHS, m_Zero())) - return 0; + return nullptr; Value *X; const APInt *C1; if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) - return 0; + return nullptr; const APInt *C2; bool OrOnTrueVal = false; @@ -388,7 +390,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); if (!OrOnFalseVal && !OrOnTrueVal) - return 0; + return nullptr; Value *V = CmpLHS; Value *Y = OrOnFalseVal ? TrueVal : FalseVal; @@ -527,7 +529,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { - ConstantInt *C1 = NULL, *C2 = NULL; + ConstantInt *C1 = nullptr, *C2 = nullptr; if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { C1 = dyn_cast<ConstantInt>(TrueVal); C2 = dyn_cast<ConstantInt>(FalseVal); @@ -586,7 +588,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) return ReplaceInstUsesWith(SI, V); - return Changed ? &SI : 0; + return Changed ? &SI : nullptr; } @@ -606,7 +608,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, // If the value is a non-instruction value like a constant or argument, it // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return true; + if (!I) return true; // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. @@ -649,11 +651,35 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, return ReplaceInstUsesWith(Outer, C); } - // TODO: MIN(MIN(A, 23), 97) - return 0; + if (SPF1 == SPF2) { + if (ConstantInt *CB = dyn_cast<ConstantInt>(B)) { + if (ConstantInt *CC = dyn_cast<ConstantInt>(C)) { + APInt ACB = CB->getValue(); + APInt ACC = CC->getValue(); + + // MIN(MIN(A, 23), 97) -> MIN(A, 23) + // MAX(MAX(A, 97), 23) -> MAX(A, 97) + if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) || + (SPF1 == SPF_SMIN && ACB.sle(ACC)) || + (SPF1 == SPF_UMAX && ACB.uge(ACC)) || + (SPF1 == SPF_SMAX && ACB.sge(ACC))) + return ReplaceInstUsesWith(Outer, Inner); + + // MIN(MIN(A, 97), 23) -> MIN(A, 23) + // MAX(MAX(A, 23), 97) -> MAX(A, 97) + if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) || + (SPF1 == SPF_SMIN && ACB.sgt(ACC)) || + (SPF1 == SPF_UMAX && ACB.ult(ACC)) || + (SPF1 == SPF_SMAX && ACB.slt(ACC))) { + Outer.replaceUsesOfWith(Inner, A); + return &Outer; + } + } + } + } + return nullptr; } - /// foldSelectICmpAnd - If one of the constants is zero (we know they can't /// both be) and we have an icmp instruction with zero, and we have an 'and' /// with the non-constant value and a power of two we can turn the select @@ -663,27 +689,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) - return 0; + return nullptr; if (!match(IC->getOperand(1), m_Zero())) - return 0; + return nullptr; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) - return 0; + return nullptr; // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. - ConstantInt *Offset = 0; + ConstantInt *Offset = nullptr; if (!TrueVal->isZero() && !FalseVal->isZero()) { if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) Offset = FalseVal; else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) Offset = TrueVal; else - return 0; + return nullptr; // Adjust TrueVal and FalseVal to the offset. TrueVal = ConstantInt::get(Builder->getContext(), @@ -696,7 +722,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, if (!AndRHS->getValue().isPowerOf2() || (!TrueVal->getValue().isPowerOf2() && !FalseVal->getValue().isPowerOf2())) - return 0; + return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. @@ -708,7 +734,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. if (AndZeros >= ValC->getBitWidth()) - return 0; + return nullptr; Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) @@ -866,7 +892,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) if (TI->hasOneUse() && FI->hasOneUse()) { - Instruction *AddOp = 0, *SubOp = 0; + Instruction *AddOp = nullptr, *SubOp = nullptr; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) if (TI->getOpcode() == FI->getOpcode()) @@ -888,7 +914,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (AddOp) { - Value *OtherAddOp = 0; + Value *OtherAddOp = nullptr; if (SubOp->getOperand(0) == AddOp->getOperand(0)) { OtherAddOp = AddOp->getOperand(1); } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { @@ -969,7 +995,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { if (TrueSI->getCondition() == CondVal) { if (SI.getTrueValue() == TrueSI->getTrueValue()) - return 0; + return nullptr; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } @@ -977,7 +1003,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { if (FalseSI->getCondition() == CondVal) { if (SI.getFalseValue() == FalseSI->getFalseValue()) - return 0; + return nullptr; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -1005,5 +1031,5 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } } - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8273dfd488..cc6665c947 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -33,7 +35,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) + if (Constant *CUI = dyn_cast<Constant>(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; @@ -50,7 +52,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return &I; } - return 0; + return nullptr; } /// CanEvaluateShifted - See if we can compute the specified value, but shifted @@ -78,7 +80,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. // TODO: Handle opposite shift by exact value. - ConstantInt *CI = 0; + ConstantInt *CI = nullptr; if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { if (CI->getZExtValue() == NumBits) { @@ -115,7 +117,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; @@ -139,7 +141,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; @@ -309,37 +311,38 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, -Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; + ConstantInt *COp1 = nullptr; + if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1)) + COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()); + else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1)) + COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()); + else + COp1 = dyn_cast<ConstantInt>(Op1); + + if (!COp1) + return nullptr; // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && - CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); return ReplaceInstUsesWith(I, - GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); } - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate - // a signed shift. - // - if (Op1->uge(TypeBits)) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - // ashr i32 X, 32 --> ashr i32 X, 31 - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } + assert(!COp1->uge(TypeBits) && + "Shift over the type width should have been removed already"); // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) @@ -367,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); @@ -384,10 +387,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1->getZExtValue(); + MaskV <<= COp1->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1->getZExtValue()); + MaskV = MaskV.lshr(COp1->getZExtValue()); } // shift1 & 0x00FF @@ -421,9 +424,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, Op0BO->getOperand(1)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + uint32_t Op1Val = COp1->getLimitedValue(TypeBits); + + APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); + Constant *Mask = ConstantInt::get(I.getContext(), Bits); + if (VectorType *VT = dyn_cast<VectorType>(X->getType())) + Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); + return BinaryOperator::CreateAnd(X, Mask); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) @@ -453,9 +460,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, Op0BO->getOperand(0)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + uint32_t Op1Val = COp1->getLimitedValue(TypeBits); + + APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); + Constant *Mask = ConstantInt::get(I.getContext(), Bits); + if (VectorType *VT = dyn_cast<VectorType>(X->getType())) + Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); + return BinaryOperator::CreateAnd(X, Mask); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) @@ -523,7 +534,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) - ShiftOp = 0; + ShiftOp = nullptr; if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { @@ -541,9 +552,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); - uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits); assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); - if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); IntegerType *Ty = cast<IntegerType>(I.getType()); @@ -671,10 +682,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } } } - return 0; + return nullptr; } Instruction *InstCombiner::visitShl(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) @@ -709,10 +723,13 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - return 0; + return nullptr; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -749,10 +766,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -805,6 +825,5 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); - return 0; + return nullptr; } - diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index a47b709b17..1b42d3d504 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #include "InstCombine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" @@ -21,6 +20,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + /// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there /// are any bits set in the constant that are not demanded. If so, shrink the @@ -57,7 +58,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, 0); - if (V == 0) return false; + if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); return true; @@ -71,7 +72,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, unsigned Depth) { Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth); - if (NewVal == 0) return false; + if (!NewVal) return false; U = NewVal; return true; } @@ -101,7 +102,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - assert(V != 0 && "Null pointer of Value???"); + assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); @@ -118,33 +119,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; KnownZero = ~KnownOne & DemandedMask; - return 0; + return nullptr; } if (isa<ConstantPointerNull>(V)) { // We know all of the bits for a constant! KnownOne.clearAllBits(); KnownZero = DemandedMask; - return 0; + return nullptr; } KnownZero.clearAllBits(); KnownOne.clearAllBits(); if (DemandedMask == 0) { // Not demanding any bits from V. if (isa<UndefValue>(V)) - return 0; + return nullptr; return UndefValue::get(VTy); } if (Depth == 6) // Limit search depth. - return 0; + return nullptr; APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); Instruction *I = dyn_cast<Instruction>(V); if (!I) { - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); - return 0; // Only analyze instructions. + computeKnownBits(V, KnownZero, KnownOne, Depth); + return nullptr; // Only analyze instructions. } // If there are multiple uses of this value and we aren't at the root, then @@ -157,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -179,8 +180,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -204,8 +205,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. @@ -216,8 +217,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, KnownZero, KnownOne, Depth); - return 0; + computeKnownBits(I, KnownZero, KnownOne, Depth); + return nullptr; } // If this is the root being simplified, allow it to have multiple uses, @@ -229,7 +230,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - ComputeMaskedBits(I, KnownZero, KnownOne, Depth); + computeKnownBits(I, KnownZero, KnownOne, Depth); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -409,20 +410,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Instruction::BitCast: if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) - return 0; // vector->int or fp->int? + return nullptr; // vector->int or fp->int? if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. - return 0; + return nullptr; } else // Don't touch a scalar-to-vector bitcast. - return 0; + return nullptr; } else if (I->getOperand(0)->getType()->isVectorTy()) // Don't touch a vector-to-scalar bitcast. - return 0; + return nullptr; if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) @@ -578,9 +579,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; } - // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // Otherwise just hand the sub off to computeKnownBits to fill in // the known zeros and ones. - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth); // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. @@ -751,7 +752,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(KnownZero.getBitWidth() - 1); @@ -810,10 +811,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Intrinsic::x86_sse42_crc32_64_64: KnownZero = APInt::getHighBitsSet(64, 32); - return 0; + return nullptr; } } - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth); break; } @@ -821,7 +822,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // constant. if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) return Constant::getIntegerValue(VTy, KnownOne); - return 0; + return nullptr; } /// Helper routine of SimplifyDemandedUseBits. It tries to simplify @@ -847,13 +848,13 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); if (!ShlOp1 || !ShrOp1) - return 0; // Noop. + return nullptr; // Noop. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); unsigned BitWidth = Ty->getIntegerBitWidth(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) - return 0; // Undef. + return nullptr; // Undef. unsigned ShlAmt = ShlOp1.getZExtValue(); unsigned ShrAmt = ShrOp1.getZExtValue(); @@ -882,7 +883,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, return VarX; if (!Shr->hasOneUse()) - return 0; + return nullptr; BinaryOperator *New; if (ShrAmt < ShlAmt) { @@ -902,7 +903,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, return InsertNewInstWith(New, *Shl); } - return 0; + return nullptr; } /// SimplifyDemandedVectorElts - The specified value produces a vector with @@ -923,7 +924,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (isa<UndefValue>(V)) { // If the entire vector is undefined, just return this info. UndefElts = EltMask; - return 0; + return nullptr; } if (DemandedElts == 0) { // If nothing is demanded, provide undef. @@ -938,7 +939,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Check if this is identity. If so, return 0 since we are not simplifying // anything. if (DemandedElts.isAllOnesValue()) - return 0; + return nullptr; Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); @@ -952,7 +953,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) return 0; + if (!Elt) return nullptr; if (isa<UndefValue>(Elt)) { // Already undef. Elts.push_back(Undef); @@ -964,12 +965,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If we changed the constant, return it. Constant *NewCV = ConstantVector::get(Elts); - return NewCV != C ? NewCV : 0; + return NewCV != C ? NewCV : nullptr; } // Limit search depth. if (Depth == 10) - return 0; + return nullptr; // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements @@ -980,14 +981,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // the main instcombine process. if (Depth != 0) // TODO: Just compute the UndefElts information recursively. - return 0; + return nullptr; // Conservatively assume that all elements are needed. DemandedElts = EltMask; } Instruction *I = dyn_cast<Instruction>(V); - if (!I) return 0; // Only analyze instructions. + if (!I) return nullptr; // Only analyze instructions. bool MadeChange = false; APInt UndefElts2(VWidth, 0); @@ -999,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If this is a variable index, we don't know which element it overwrites. // demand exactly the same input as we produce. ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); - if (Idx == 0) { + if (!Idx) { // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, @@ -1281,5 +1282,5 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } } - return MadeChange ? I : 0; + return MadeChange ? I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 521dc9cd2e..8c5e202b5c 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -17,6 +17,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're /// extracting one known element. If false we're extracting a variable index. @@ -73,7 +75,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa<ConstantInt>(III->getOperand(2))) - return 0; + return nullptr; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the @@ -97,14 +99,14 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } // Extract a value from a vector add operation with a constant zero. - Value *Val = 0; Constant *Con = 0; + Value *Val = nullptr; Constant *Con = nullptr; if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { if (Con->getAggregateElement(EltNo)->isNullValue()) return FindScalarElement(Val, EltNo); } // Otherwise, we don't know. - return 0; + return nullptr; } // If we have a PHI node with a vector type that has only 2 uses: feed @@ -113,7 +115,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. if (!PN->hasNUses(2)) - return NULL; + return nullptr; // If so, it's known at this point that one operand is PHI and the other is // an extractelement node. Find the PHI user that is not the extractelement @@ -128,7 +130,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // otherwise return NULL. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) - return NULL; + return nullptr; // Create a scalar PHI node that will replace the vector PHI node // just before the current PHI node. @@ -318,7 +320,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } } } - return 0; + return nullptr; } /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns @@ -440,10 +442,10 @@ static ShuffleOps CollectShuffleElements(Value *V, // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) { + if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { Value *RHS = EI->getOperand(0); ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); - assert(LR.second == 0 || LR.second == RHS); + assert(LR.second == nullptr || LR.second == RHS); if (LR.first->getType() != RHS->getType()) { // We tried our best, but we can't find anything compatible with RHS @@ -488,6 +490,41 @@ static ShuffleOps CollectShuffleElements(Value *V, return std::make_pair(V, nullptr); } +/// Try to find redundant insertvalue instructions, like the following ones: +/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 +/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 +/// Here the second instruction inserts values at the same indices, as the +/// first one, making the first one redundant. +/// It should be transformed to: +/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 +Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { + bool IsRedundant = false; + ArrayRef<unsigned int> FirstIndices = I.getIndices(); + + // If there is a chain of insertvalue instructions (each of them except the + // last one has only one use and it's another insertvalue insn from this + // chain), check if any of the 'children' uses the same indices as the first + // instruction. In this case, the first one is redundant. + Value *V = &I; + unsigned Depth = 0; + while (V->hasOneUse() && Depth < 10) { + User *U = V->user_back(); + auto UserInsInst = dyn_cast<InsertValueInst>(U); + if (!UserInsInst || U->getOperand(0) != V) + break; + if (UserInsInst->getIndices() == FirstIndices) { + IsRedundant = true; + break; + } + V = UserInsInst; + Depth++; + } + + if (IsRedundant) + return ReplaceInstUsesWith(I, I.getOperand(0)); + return nullptr; +} + Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -523,13 +560,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { SmallVector<Constant*, 16> Mask; - ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0); + ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr); // The proposed shuffle may be trivial, in which case we shouldn't // perform the combine. if (LR.first != &IE && LR.second != &IE) { // We now have a shuffle of LHS, RHS, Mask. - if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType()); + if (LR.second == nullptr) + LR.second = UndefValue::get(LR.first->getType()); return new ShuffleVectorInst(LR.first, LR.second, ConstantVector::get(Mask)); } @@ -546,7 +584,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { return &IE; } - return 0; + return nullptr; } /// Return true if we can evaluate the specified expression tree if the vector @@ -801,6 +839,20 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { llvm_unreachable("failed to reorder elements of vector instruction!"); } +static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask, + bool &isLHSID, bool &isRHSID) { + isLHSID = isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -864,16 +916,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } + bool isLHSID, isRHSID; + RecognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); @@ -932,16 +976,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); if (LHSShuffle) if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) - LHSShuffle = NULL; + LHSShuffle = nullptr; if (RHSShuffle) if (!isa<UndefValue>(RHSShuffle->getOperand(1))) - RHSShuffle = NULL; + RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) - return MadeChange ? &SVI : 0; + return MadeChange ? &SVI : nullptr; - Value* LHSOp0 = NULL; - Value* LHSOp1 = NULL; - Value* RHSOp0 = NULL; + Value* LHSOp0 = nullptr; + Value* LHSOp1 = nullptr; + Value* RHSOp0 = nullptr; unsigned LHSOp0Width = 0; unsigned RHSOp0Width = 0; if (LHSShuffle) { @@ -973,11 +1017,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // case 4 if (LHSOp0 == RHSOp0) { newLHS = LHSOp0; - newRHS = NULL; + newRHS = nullptr; } if (newLHS == LHS && newRHS == RHS) - return MadeChange ? &SVI : 0; + return MadeChange ? &SVI : nullptr; SmallVector<int, 16> LHSMask; SmallVector<int, 16> RHSMask; @@ -1037,7 +1081,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If newRHS == newLHS, we want to remap any references from newRHS to // newLHS so that we can properly identify splats that may occur due to // obfuscation across the two vectors. - if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) + if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) eltMask += newLHSWidth; } @@ -1063,10 +1107,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); } } - if (newRHS == NULL) + if (!newRHS) newRHS = UndefValue::get(newLHS->getType()); return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } - return MadeChange ? &SVI : 0; + // If the result mask is an identity, replace uses of this instruction with + // corresponding argument. + bool isLHSID, isRHSID; + RecognizeIdentityMask(newMask, isLHSID, isRHSID); + if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); + if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); + + return MadeChange ? &SVI : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 8c780b50ca..1ab7db3a98 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -10,7 +10,6 @@ #ifndef INSTCOMBINE_WORKLIST_H #define INSTCOMBINE_WORKLIST_H -#define DEBUG_TYPE "instcombine" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Instruction.h" @@ -18,6 +17,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "instcombine" + namespace llvm { /// InstCombineWorklist - This is the worklist management logic for @@ -68,7 +69,7 @@ public: if (It == WorklistMap.end()) return; // Not in worklist. // Don't bother moving everything down, just null out the slot. - Worklist[It->second] = 0; + Worklist[It->second] = nullptr; WorklistMap.erase(It); } @@ -101,4 +102,6 @@ public: } // end namespace llvm. +#undef DEBUG_TYPE + #endif diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 0cab81b1bb..4c36887f62 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,7 +33,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" #include "llvm-c/Initialization.h" @@ -58,6 +57,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); @@ -512,7 +513,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { } } - return 0; + return nullptr; } // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction @@ -530,7 +531,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); - return 0; + return nullptr; } // dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the @@ -549,7 +550,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); - return 0; + return nullptr; } static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, @@ -595,13 +596,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, // not have a second operand. Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Don't modify shared select instructions - if (!SI->hasOneUse()) return 0; + if (!SI->hasOneUse()) return nullptr; Value *TV = SI->getOperand(1); Value *FV = SI->getOperand(2); if (isa<Constant>(TV) || isa<Constant>(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->isIntegerTy(1)) return 0; + if (SI->getType()->isIntegerTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. @@ -610,10 +611,10 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); // Verify that either both or neither are vectors. - if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr; // If vectors, verify that they have the same number of elements. if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) - return 0; + return nullptr; } Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); @@ -622,7 +623,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return SelectInst::Create(SI->getCondition(), SelectTrueVal, SelectFalseVal); } - return 0; + return nullptr; } @@ -634,7 +635,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) - return 0; + return nullptr; // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the @@ -644,7 +645,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { for (User *U : PN->users()) { Instruction *UI = cast<Instruction>(U); if (UI != &I && !I.isIdenticalTo(UI)) - return 0; + return nullptr; } // Otherwise, we can replace *all* users with the new PHI we form. } @@ -654,14 +655,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // remember the BB it is in. If there is more than one or if *it* is a PHI, // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = 0; + BasicBlock *NonConstBB = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal)) continue; - if (isa<PHINode>(InVal)) return 0; // Itself a phi. - if (NonConstBB) return 0; // More than one non-const value. + if (isa<PHINode>(InVal)) return nullptr; // Itself a phi. + if (NonConstBB) return nullptr; // More than one non-const value. NonConstBB = PN->getIncomingBlock(i); @@ -669,22 +670,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // insert a computation after it without breaking the edge. if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) if (II->getParent() == NonConstBB) - return 0; + return nullptr; // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. if (NonConstBB == I.getParent()) - return 0; + return nullptr; } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0) { + if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return 0; + if (!BI || !BI->isUnconditional()) return nullptr; } // Okay, we can do the transformation: create the new PHI node. @@ -708,7 +709,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { BasicBlock *ThisBB = PN->getIncomingBlock(i); Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = 0; + Value *InV = nullptr; // Beware of ConstantExpr: it may eventually evaluate to getNullValue, // even if currently isNullValue gives false. Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)); @@ -722,7 +723,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; + Value *InV = nullptr; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa<ICmpInst>(CI)) @@ -736,7 +737,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } else if (I.getNumOperands() == 2) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; + Value *InV = nullptr; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::get(I.getOpcode(), InC, C); else @@ -776,11 +777,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, assert(PtrTy->isPtrOrPtrVectorTy()); if (!DL) - return 0; + return nullptr; Type *Ty = PtrTy->getPointerElementType(); if (!Ty->isSized()) - return 0; + return nullptr; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type @@ -806,7 +807,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, while (Offset) { // Indexing into tail padding between struct/array elements. if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) - return 0; + return nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = DL->getStructLayout(STy); @@ -827,7 +828,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, Ty = AT->getElementType(); } else { // Otherwise, we can't index into the middle of this atomic type, bail. - return 0; + return nullptr; } } @@ -859,7 +860,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // If Scale is zero then it does not divide Val. if (Scale.isMinValue()) - return 0; + return nullptr; // Look through chains of multiplications, searching for a constant that is // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4 @@ -902,7 +903,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder); if (!Remainder.isMinValue()) // Not divisible by Scale. - return 0; + return nullptr; // Replace with the quotient in the parent. Op = ConstantInt::get(CI->getType(), Quotient); NoSignedWrap = true; @@ -915,7 +916,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) - return 0; + return nullptr; // There are three cases for multiplication: multiplication by exactly // the scale, multiplication by a constant different to the scale, and @@ -934,7 +935,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Otherwise drill down into the constant. if (!Op->hasOneUse()) - return 0; + return nullptr; Parent = std::make_pair(BO, 1); continue; @@ -943,7 +944,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication by something else. Drill down into the left-hand side // since that's where the reassociate pass puts the good stuff. if (!Op->hasOneUse()) - return 0; + return nullptr; Parent = std::make_pair(BO, 0); continue; @@ -954,7 +955,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication by a power of 2. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) - return 0; + return nullptr; Value *LHS = BO->getOperand(0); int32_t Amt = cast<ConstantInt>(BO->getOperand(1))-> @@ -968,7 +969,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { break; } if (Amt < logScale || !Op->hasOneUse()) - return 0; + return nullptr; // Multiplication by more than the scale. Reduce the multiplying amount // by the scale in the parent. @@ -979,7 +980,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } if (!Op->hasOneUse()) - return 0; + return nullptr; if (CastInst *Cast = dyn_cast<CastInst>(Op)) { if (Cast->getOpcode() == Instruction::SExt) { @@ -993,7 +994,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Scale and the multiplication Y * SmallScale should not overflow. if (SmallScale.sext(Scale.getBitWidth()) != Scale) // SmallScale does not sign-extend to Scale. - return 0; + return nullptr; assert(SmallScale.exactLogBase2() == logScale); // Require that Y * SmallScale must not overflow. RequireNoSignedWrap = true; @@ -1012,7 +1013,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // trunc (Y * sext Scale) does not, so nsw flags need to be cleared // from this point up in the expression (see later). if (RequireNoSignedWrap) - return 0; + return nullptr; // Drill down through the cast. unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); @@ -1026,7 +1027,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } // Unsupported expression, bail out. - return 0; + return nullptr; } // We know that we can successfully descale, so from here on we can safely @@ -1082,6 +1083,101 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } while (1); } +/// \brief Creates node of binary operation with the same attributes as the +/// specified one but with other operands. +static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *B) { + Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) { + if (isa<OverflowingBinaryOperator>(NewBO)) { + NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap()); + NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap()); + } + if (isa<PossiblyExactOperator>(NewBO)) + NewBO->setIsExact(Inst.isExact()); + } + return BORes; +} + +/// \brief Makes transformation of binary operation specific for vector types. +/// \param Inst Binary operator to transform. +/// \return Pointer to node that must replace the original binary operator, or +/// null pointer if no transformation was made. +Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { + if (!Inst.getType()->isVectorTy()) return nullptr; + + unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); + Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); + assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth); + assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth); + + // If both arguments of binary operation are shuffles, which use the same + // mask and shuffle within a single vector, it is worthwhile to move the + // shuffle after binary operation: + // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m) + if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) { + ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS); + ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS); + if (isa<UndefValue>(LShuf->getOperand(1)) && + isa<UndefValue>(RShuf->getOperand(1)) && + LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() && + LShuf->getMask() == RShuf->getMask()) { + Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), + RShuf->getOperand(0), Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(NewBO->getType()), LShuf->getMask()); + return Res; + } + } + + // If one argument is a shuffle within one vector, the other is a constant, + // try moving the shuffle after the binary operation. + ShuffleVectorInst *Shuffle = nullptr; + Constant *C1 = nullptr; + if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS); + if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS); + if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS); + if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS); + if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) && + Shuffle->getType() == Shuffle->getOperand(0)->getType()) { + SmallVector<int, 16> ShMask = Shuffle->getShuffleMask(); + // Find constant C2 that has property: + // shuffle(C2, ShMask) = C1 + // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>) + // reorder is not possible. + SmallVector<Constant*, 16> C2M(VWidth, + UndefValue::get(C1->getType()->getScalarType())); + bool MayChange = true; + for (unsigned I = 0; I < VWidth; ++I) { + if (ShMask[I] >= 0) { + assert(ShMask[I] < (int)VWidth); + if (!isa<UndefValue>(C2M[ShMask[I]])) { + MayChange = false; + break; + } + C2M[ShMask[I]] = C1->getAggregateElement(I); + } + } + if (MayChange) { + Constant *C2 = ConstantVector::get(C2M); + Value *NewLHS, *NewRHS; + if (isa<Constant>(LHS)) { + NewLHS = C2; + NewRHS = Shuffle->getOperand(0); + } else { + NewLHS = Shuffle->getOperand(0); + NewRHS = C2; + } + Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(Inst.getType()), Shuffle->getMask()); + return Res; + } + } + + return nullptr; +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); @@ -1130,7 +1226,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) - return 0; + return nullptr; // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This @@ -1138,7 +1234,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEPOperator *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0))) if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return 0; // Wait until our source is folded to completion. + return nullptr; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; @@ -1166,7 +1262,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // intptr_t). Just avoid transforming this until the input has been // normalized. if (SO1->getType() != GO1->getType()) - return 0; + return nullptr; Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } @@ -1216,7 +1312,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // We do not handle pointer-vector geps here. if (!StrippedPtrTy) - return 0; + return nullptr; if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; @@ -1241,7 +1337,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst *Res = GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); - return Res; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) + return Res; + // Insert Res, and create an addrspacecast. + // e.g., + // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ... + // -> + // %0 = GEP i8 addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1253,8 +1357,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. - GEP.setOperand(0, StrippedPtr); - return &GEP; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) { + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + // Cannot replace the base pointer directly because StrippedPtr's + // address space is different. Instead, create a new GEP followed by + // an addrspacecast. + // e.g., + // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*), + // i32 0, ... + // -> + // %0 = GEP [10 x i8] addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } } @@ -1360,7 +1480,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!DL) - return 0; + return nullptr; /// See if we can simplify: /// X = bitcast A* to B* @@ -1412,7 +1532,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - return 0; + return nullptr; } static bool @@ -1527,7 +1647,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { } return EraseInstFromFunction(MI); } - return 0; + return nullptr; } /// \brief Move the call to free before a NULL test. @@ -1556,30 +1676,30 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) { // would duplicate the call to free in each predecessor and it may // not be profitable even for code size. if (!PredBB) - return 0; + return nullptr; // Validate constraint #2: Does this block contains only the call to // free and an unconditional branch? // FIXME: We could check if we can speculate everything in the // predecessor block if (FreeInstrBB->size() != 2) - return 0; + return nullptr; BasicBlock *SuccBB; if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB))) - return 0; + return nullptr; // Validate the rest of constraint #1 by matching on the pred branch. TerminatorInst *TI = PredBB->getTerminator(); BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB))) - return 0; + return nullptr; if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) - return 0; + return nullptr; // Validate constraint #3: Ensure the null case just falls through. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) - return 0; + return nullptr; assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && "Broken CFG: missing edge from predecessor to successor"); @@ -1614,14 +1734,14 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { if (Instruction *I = tryToMoveFreeBeforeNullTest(FI)) return I; - return 0; + return nullptr; } Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True - Value *X = 0; + Value *X = nullptr; BasicBlock *TrueDest; BasicBlock *FalseDest; if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && @@ -1664,7 +1784,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - return 0; + return nullptr; } Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { @@ -1688,7 +1808,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } } - return 0; + return nullptr; } Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { @@ -1705,7 +1825,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // first index return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); } - return 0; // Can't handle other constants + return nullptr; // Can't handle other constants } if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { @@ -1838,7 +1958,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // and if again single-use then via load (gep (gep)) to load (gep). // However, double extracts from e.g. function arguments or return values // aren't handled yet. - return 0; + return nullptr; } enum Personality_Type { @@ -2177,7 +2297,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { return &LI; } - return 0; + return nullptr; } @@ -2270,7 +2390,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(i); - if (CE == 0) continue; + if (CE == nullptr) continue; Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) @@ -2374,7 +2494,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { while (!Worklist.isEmpty()) { Instruction *I = Worklist.RemoveOne(); - if (I == 0) continue; // skip null values. + if (I == nullptr) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, TLI)) { @@ -2516,7 +2636,7 @@ bool InstCombiner::runOnFunction(Function &F) { return false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); // Minimizing size? MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -2543,7 +2663,7 @@ bool InstCombiner::runOnFunction(Function &F) { while (DoOneIteration(F, Iteration++)) EverMadeChange = true; - Builder = 0; + Builder = nullptr; return EverMadeChange; } |