diff options
author | Stephen Hines <srhines@google.com> | 2014-04-23 16:57:46 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-04-24 15:53:16 -0700 |
commit | 36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch) | |
tree | e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/CodeGen/SelectionDAG | |
parent | 69a8640022b04415ae9fac62f8ab090601d8f889 (diff) | |
download | external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2 external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip |
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
27 files changed, 2046 insertions, 899 deletions
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk index 3f28e08029..0e52ee3609 100644 --- a/lib/CodeGen/SelectionDAG/Android.mk +++ b/lib/CodeGen/SelectionDAG/Android.mk @@ -41,6 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) # For the device # ===================================================== +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES) @@ -52,3 +53,4 @@ LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) include $(LLVM_GEN_INTRINSICS_MK) include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 43f72c5ef9..cc0c5fa076 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -50,11 +50,28 @@ STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> CombinerAA("combiner-alias-analysis", cl::Hidden, - cl::desc("Turn on alias analysis during testing")); + cl::desc("Enable DAG combiner alias-analysis heuristics")); static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, - cl::desc("Include global information in alias analysis")); + cl::desc("Enable DAG combiner's use of IR alias analysis")); + +// FIXME: Enable the use of TBAA. There are two known issues preventing this: +// 1. Stack coloring does not update TBAA when merging allocas +// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations. +// Because BasicAA does not handle inttoptr, we'll often miss basic type +// punning idioms that we need to catch so we don't miscompile real-world +// code. + static cl::opt<bool> + UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false), + cl::desc("Enable DAG combiner's use of TBAA")); + +#ifndef NDEBUG + static cl::opt<std::string> + CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); +#endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. @@ -212,6 +229,7 @@ namespace { SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); + SDValue visitRotate(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -257,11 +275,12 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); - SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); @@ -271,6 +290,11 @@ namespace { bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans = true); + + bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const; + bool isOneUseSetCC(SDValue N) const; + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -280,6 +304,10 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + SDLoc DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -326,6 +354,14 @@ namespace { /// \return True if some memory operations were changed. bool MergeConsecutiveStores(StoreSDNode *N); + /// \brief Try to transform a truncation where C is a constant: + /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) + /// + /// \p N needs to be a truncation and its first operand an AND. Other + /// requirements are checked by the function (e.g. that trunc is + /// single-use) and if missed an empty SDValue is returned. + SDValue distributeTruncateThroughAnd(SDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -378,7 +414,7 @@ public: explicit WorkListRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { DC.removeFromWorkList(N); } }; @@ -566,79 +602,121 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } - // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc -// that selects between the values 1 and 0, making it equivalent to a setcc. -// Also, set the incoming LHS, RHS, and CC references to the appropriate -// nodes based on the type of node we are checking. This simplifies life a -// bit for the callers. -static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) { +// that selects between the target values used for true and false, making it +// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to +// the appropriate nodes based on the type of node we are checking. This +// simplifies life a bit for the callers. +bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(2); return true; } - if (N.getOpcode() == ISD::SELECT_CC && - N.getOperand(2).getOpcode() == ISD::Constant && - N.getOperand(3).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && - cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { - LHS = N.getOperand(0); - RHS = N.getOperand(1); - CC = N.getOperand(4); - return true; - } - return false; + + if (N.getOpcode() != ISD::SELECT_CC || + !TLI.isConstTrueVal(N.getOperand(2).getNode()) || + !TLI.isConstFalseVal(N.getOperand(3).getNode())) + return false; + + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; } // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only // one use. If this is true, it allows the users to invert the operation for // free when it is profitable to do so. -static bool isOneUseSetCC(SDValue N) { +bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) return true; return false; } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + +// \brief Returns the SDNode if it is a constant BuildVector or constant. +static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if(BV && BV->isConstant()) + return BV; + return NULL; +} + +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// int. +static ConstantSDNode *isConstOrConstSplat(SDValue N) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) + return BV->getConstantSplatValue(); + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { - if (isa<ConstantSDNode>(N1)) { - // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N0.getOperand(1)), - cast<ConstantSDNode>(N1)); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - } - if (N0.hasOneUse()) { - // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N0.getOperand(0), N1); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); - } - } - - if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { - if (isa<ConstantSDNode>(N0)) { - // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N1.getOperand(1)), - cast<ConstantSDNode>(N0)); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); - } - if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N1.getOperand(0), N0); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + if (N0.getOpcode() == Opc) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } + if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) + return SDValue(); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + } + + if (N1.getOpcode() == Opc) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } + if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + if (!OpNode.getNode()) + return SDValue(); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } } } @@ -1148,6 +1226,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SHL: return visitSHL(N); case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); + case ISD::ROTR: + case ISD::ROTL: return visitRotate(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1193,6 +1273,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); } return SDValue(); } @@ -1507,8 +1588,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ + if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + } } } @@ -1778,22 +1861,6 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2229,7 +2296,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, bool HiExists = N->hasAnyUseOfValue(1); if (!HiExists && (!LegalOperations || - TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); @@ -2454,35 +2521,66 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N0.getOperand(1).getOpcode() == ISD::UNDEF && - N1.getOperand(1).getOpcode() == ISD::UNDEF) { + // There are other cases where moving the shuffle after the xor/and/or + // is profitable even if shuffles don't perform a swizzle. + // If both shuffles use the same mask, and both shuffles have the same first + // or second operand, then it might still be profitable to move the shuffle + // after the xor/and/or operation. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); - assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"); - - unsigned NumElts = VT.getVectorNumElements(); - + // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. - bool SameMask = true; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx0 = SVN0->getMaskElt(i); - int Idx1 = SVN1->getMaskElt(i); - if (Idx0 != Idx1) { - SameMask = false; - break; + // Check also that shuffles have only one use to avoid introducing extra + // instructions. + if (SVN0->hasOneUse() && SVN1->hasOneUse() && + SVN0->getMask().equals(SVN1->getMask())) { + SDValue ShOp = N0->getOperand(1); + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); } - } - if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, SDLoc(N), Op, - DAG.getUNDEF(VT), &SVN0->getMask()[0]); + // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(0), N1->getOperand(0)); + AddToWorkList(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, + &SVN0->getMask()[0]); + } + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0->getOperand(0); + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); + } + + // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(1), N1->getOperand(1)); + AddToWorkList(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, + &SVN0->getMask()[0]); + } } } @@ -3151,6 +3249,60 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return N0; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N1; + + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // Do this only if the resulting shuffle is legal. + if (isa<ShuffleVectorSDNode>(N0) && + isa<ShuffleVectorSDNode>(N1) && + N0->getOperand(1) == N1->getOperand(1) && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { + bool CanFold = true; + unsigned NumElts = VT.getVectorNumElements(); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + // We construct two shuffle masks: + // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand + // and N1 as the second operand. + // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand + // and N0 as the second operand. + // We do this because OR is commutable and therefore there might be + // two ways to fold this node into a shuffle. + SmallVector<int,4> Mask1; + SmallVector<int,4> Mask2; + + for (unsigned i = 0; i != NumElts && CanFold; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Both shuffle indexes are undef. Propagate Undef. + if (M0 < 0 && M1 < 0) { + Mask1.push_back(M0); + Mask2.push_back(M0); + continue; + } + + if (M0 < 0 || M1 < 0 || + (M0 < (int)NumElts && M1 < (int)NumElts) || + (M0 >= (int)NumElts && M1 >= (int)NumElts)) { + CanFold = false; + break; + } + + Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); + Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); + } + + if (CanFold) { + // Fold this sequence only if the resulting shuffle is 'legal'. + if (TLI.isShuffleMaskLegal(Mask1, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), + N1->getOperand(0), &Mask1[0]); + if (TLI.isShuffleMaskLegal(Mask2, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), + N0->getOperand(0), &Mask2[0]); + } + } } // fold (or x, undef) -> -1 @@ -3192,11 +3344,14 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { + SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); + if (!COR.getNode()) + return SDValue(); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + N0.getOperand(0), N1), COR); + } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -3302,6 +3457,155 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { return false; } +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: +// +// (or (shift1 X, Neg), (shift2 X, Pos)) +// +// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate +// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// to consider shift amounts with defined behavior. +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { + // If OpSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) + // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // + // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // for the stronger condition: + // + // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == OpSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == OpSize). + // + // We could actually use [A] whenever OpSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + unsigned MaskLoBits = 0; + if (Neg.getOpcode() == ISD::AND && + isPowerOf2_64(OpSize) && + Neg.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(OpSize); + } + + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. + if (Neg.getOpcode() != ISD::SUB) + return 0; + ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); + if (!NegC) + return 0; + SDValue NegOp1 = Neg.getOperand(1); + + // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // Pos'. The truncation is redundant for the purpose of the equality. + if (MaskLoBits && + Pos.getOpcode() == ISD::AND && + Pos.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) + Pos = Pos.getOperand(0); + + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // OpSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; + if (Pos == NegOp1) + Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. + // Then the condition we want to prove becomes: + // + // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (OpSize - PosC) & Mask + // OpSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && + Pos.getOperand(0) == NegOp1 && + Pos.getOperand(1).getOpcode() == ISD::Constant) + Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + + NegC->getAPIntValue()); + else + return false; + + // Now we just need to check that OpSize & Mask == Width & Mask. + if (MaskLoBits) + // Opsize & Mask is 0 since Mask is Opsize - 1. + return Width.getLoBits(MaskLoBits) == 0; + return Width == OpSize; +} + +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, SDLoc DL) { + // fold (or (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) or (rotr x, (sub 32, y)) + // + // fold (or (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> + // (rotr x, y) or (rotl x, (sub 32, y)) + EVT VT = Shifted.getValueType(); + if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, + HasPos ? Pos : Neg).getNode(); + } + + // fold (or (shl (*ext x), (*ext y)), + // (srl (*ext x), (*ext (sub 32, y)))) -> + // (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y))) + // + // fold (or (shl (*ext x), (*ext (sub 32, y))), + // (srl (*ext x), (*ext y))) -> + // (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y))) + if (Shifted.getOpcode() == ISD::ZERO_EXTEND || + Shifted.getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerShifted = Shifted.getOperand(0); + EVT InnerVT = InnerShifted.getValueType(); + bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT); + if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) { + if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) { + SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL, + InnerVT, InnerShifted, HasPosInner ? Pos : Neg); + return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode(); + } + } + } + + return 0; +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. @@ -3342,6 +3646,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { unsigned OpSizeInBits = VT.getSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) @@ -3395,28 +3700,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } + SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; return 0; } @@ -3559,7 +3851,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// visitShiftByConstant - Handle transforms common to the three shifts, when /// the shift amount is a constant. -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { + // We can't and shouldn't fold opaque constants. + if (Amt->isOpaque()) + return SDValue(); + SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -3585,9 +3881,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { break; } - // We require the RHS of the binop to be a constant as well. + // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); - if (!BinOpCst) return SDValue(); + if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -3617,6 +3913,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); + assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), @@ -3627,18 +3924,74 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } +SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { + assert(N->getOpcode() == ISD::TRUNCATE); + assert(N->getOperand(0).getOpcode() == ISD::AND); + + // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) + if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { + SDValue N01 = N->getOperand(0).getOperand(1); + + if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + + return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, + DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), + DAG.getConstant(TruncC, TruncVT)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitRotate(SDNode *N) { + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). + if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && + N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); + if (NewOp1.getNode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), NewOp1); + } + return SDValue(); +} + SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); + // If setcc produces all-one true value then: + // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) + if (N1CV && N1CV->isConstant()) { + if (N0.getOpcode() == ISD::AND && + TLI.getBooleanContents(true) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); + + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) { + SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); + if (C.getNode()) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + } + } else { + N1C = isConstOrConstSplat(N1); + } + } } // fold (shl c1, c2) -> c1<<c2 @@ -3662,35 +4015,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getConstant(0, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SHL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) @@ -3701,20 +4044,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) && - N0.getOperand(0).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); - if (c2 >= OpSizeInBits - InnerShiftSize) { - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, - N0.getOperand(0)->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + N0.getOperand(0).getOpcode() == ISD::SHL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0Op0.getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + N0Op0->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } } @@ -3722,19 +4066,20 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - if (c1 == c2) { - SDValue NewOp0 = N0.getOperand(0); - EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); - AddToWorkList(NewSHL.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + N0.getOperand(0).getOpcode() == ISD::SRL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + if (c1 < VT.getScalarSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } } } } @@ -3743,40 +4088,39 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - c1); - SDValue Shift; - if (c2 > c1) { - Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2-c1, N1.getValueType())); - } else { - Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1-c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + if (c1 < OpSizeInBits) { + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2 - c1); + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1 - c2); + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, + DAG.getConstant(Mask, VT)); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + unsigned BitSize = VT.getScalarSizeInBits(); SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - - N1C->getZExtValue()), - VT); + DAG.getConstant(APInt::getHighBitsSet(BitSize, + BitSize - N1C->getZExtValue()), VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } if (N1C) { - SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; } @@ -3796,6 +4140,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) @@ -3829,11 +4175,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; + if (Sum >= OpSizeInBits) + Sum = OpSizeInBits - 1; return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1C->getValueType(0))); + DAG.getConstant(Sum, N1.getValueType())); } } @@ -3842,14 +4189,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // result_size - n != m. // If truncate is free for the target sext(shl) is likely to result in better // code. - if (N0.getOpcode() == ISD::SHL) { + if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constanst of the shifts, CN0 = m, CN = n. - const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (N01C && N1C) { + const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); + if (N01C) { + LLVMContext &Ctx = *DAG.getContext(); // Determine what the truncate's result bitsize and type would be. - EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), - OpSizeInBits - N1C->getZExtValue()); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); + + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3876,44 +4226,33 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } - } - - // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + } + + // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOperand(1).hasOneUse() && - N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { - EVT LargeVT = N0.getOperand(0).getValueType(); - ConstantSDNode *LargeShiftAmt = - cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); - - if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == - LargeShiftAmt->getZExtValue()) { - SDValue Amt = - DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), - getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, - N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + N1C) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { + unsigned LargeShiftVal = LargeShift->getZExtValue(); + EVT LargeVT = N0Op0.getValueType(); + + if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDValue Amt = + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + getShiftAmountTy(N0Op0.getOperand(0).getValueType())); + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + N0Op0.getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + } } } @@ -3927,7 +4266,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { - SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; } @@ -3947,6 +4286,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 @@ -3967,14 +4308,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getConstant(0, VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N01C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) @@ -3999,18 +4341,21 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - N0.getValueSizeInBits() <= 64) { - uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { + unsigned BitSize = N0.getScalarValueSizeInBits(); + if (BitSize <= 64) { + uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); - if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + unsigned BitSize = SmallVT.getScalarSizeInBits(); + if (N1C->getZExtValue() >= BitSize) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { @@ -4019,7 +4364,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), DAG.getConstant(Mask, VT)); @@ -4028,14 +4373,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. - if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && - N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { APInt KnownZero, KnownOne; DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); @@ -4070,22 +4415,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not @@ -4094,7 +4427,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C) { - SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; } @@ -4275,12 +4608,12 @@ static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the inputs. SDValue Lo, Hi, LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -4338,9 +4671,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return SDValue(); SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; - llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); @@ -4353,6 +4686,13 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + return SDValue(); } @@ -4402,6 +4742,65 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } +// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext +// dag node into a ConstantSDNode or a build_vector of constants. +// This function is called by the DAGCombiner when visiting sext/zext/aext +// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +// Vector extends are not folded if operations are legal; this is to +// avoid introducing illegal build_vector dag nodes. +static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes, + bool LegalOperations) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + + // fold (sext c1) -> c1 + // fold (zext c1) -> c1 + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); + + // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) + EVT SVT = VT.getScalarType(); + if (!(VT.isVector() && + (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) + return 0; + + // We can fold this node into a build_vector. + unsigned VTBits = SVT.getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + unsigned ShAmt = VTBits - EVTBits; + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + SDLoc DL(N); + + for (unsigned i=0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(DAG.getUNDEF(SVT)); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + if (Opcode == ISD::SIGN_EXTEND) + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + SVT)); + else + Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), + SVT)); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode(); +} + // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above @@ -4492,9 +4891,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (sext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) @@ -4671,7 +5070,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); @@ -4680,15 +5079,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && - (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { - return DAG.getSelect(SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + + if (!VT.isVector()) { + EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); + if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + SDLoc DL(N); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + SDValue SetCC = DAG.getSetCC(DL, + SetCCVT, + N0.getOperand(0), N0.getOperand(1), CC); + EVT SelectVT = getSetCCResultType(VT); + return DAG.getSelect(DL, VT, + DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + NegOne, DAG.getConstant(0, VT)); + + } } } @@ -4742,9 +5147,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (zext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) @@ -4925,10 +5331,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { - if (!LegalOperations && VT.isVector()) { + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { + EVT N0VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N0VT) == N0.getValueType()) + return SDValue(); + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. - EVT N0VT = N0.getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), DAG.getConstant(1, EltVT)); @@ -5007,9 +5417,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (aext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) @@ -5466,6 +5877,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } + // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs + // into a build_vector. + if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + unsigned ShAmt = VTBits - EVTBits; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + Op.getValueType())); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts); + } + return SDValue(); } @@ -5510,7 +5944,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - LegalTypes && !LegalOperations && N0->hasOneUse()) { + LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); @@ -5587,6 +6021,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Reduced = ReduceLoadWidth(N); if (Reduced.getNode()) return Reduced; + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (!LN0->isVolatile() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. @@ -5654,8 +6102,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || - LD1->getPointerInfo().getAddrSpace() != - LD2->getPointerInfo().getAddrSpace()) + LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); @@ -5691,14 +6138,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (!LegalTypes && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && VT.isVector()) { - bool isSimple = true; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) - if (N0.getOperand(i).getOpcode() != ISD::UNDEF && - N0.getOperand(i).getOpcode() != ISD::Constant && - N0.getOperand(i).getOpcode() != ISD::ConstantFP) { - isSimple = false; - break; - } + bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && @@ -6551,7 +6991,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. + // The next optimizations are desirable only if SELECT_CC can be lowered. // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types @@ -6608,7 +7048,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. + // The next optimizations are desirable only if SELECT_CC can be lowered. // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types @@ -7537,7 +7977,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7888,14 +8333,6 @@ struct LoadedSlice { }; } -/// \brief Sorts LoadedSlice according to their offset. -struct LoadedSliceSorter { - bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { - assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); - return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); - } -}; - /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { @@ -7939,7 +8376,11 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + std::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + }); const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. @@ -8075,8 +8516,8 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. - // Moreover, if we shifted with a non 8-bits multiple, the slice - // will be accross several bytes. We do not support that. + // Moreover, if we shifted with a non-8-bits multiple, the slice + // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) return 0; @@ -8543,14 +8984,6 @@ struct MemOpLink { unsigned SequenceNum; }; -/// Sorts store nodes in a link according to their offset from a shared -// base ptr. -struct ConsecutiveMemoryChainSorter { - bool operator()(MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - } -}; - bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; @@ -8669,7 +9102,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Sort the memory operands according to their distance from the base pointer. std::sort(StoreNodes.begin(), StoreNodes.end(), - ConsecutiveMemoryChainSorter()); + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase || + (LHS.OffsetFromBase == RHS.OffsetFromBase && + LHS.SequenceNum > RHS.SequenceNum); + }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. @@ -8717,7 +9154,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { NonZero |= !C->getConstantFPValue()->isNullValue(); } else { - // Non constant. + // Non-constant. break; } @@ -9125,7 +9562,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -9306,9 +9748,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector - // without using extract_subvector. + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo && !LegalOperations) { + && ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); @@ -9320,16 +9763,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getUNDEF(NVT); // Select the right vector half to extract from. + SDValue SVInVec; if (OrigElt < NumElem) { - InVec = InVec->getOperand(0); + SVInVec = InVec->getOperand(0); } else { - InVec = InVec->getOperand(1); + SVInVec = InVec->getOperand(1); OrigElt -= NumElem; } - EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - InVec, DAG.getConstant(OrigElt, IndexTy)); + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations) { + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, + SVInVec, DAG.getConstant(OrigElt, IndexTy)); + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -9836,6 +10295,26 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) + // -> (BUILD_VECTOR A, B, ..., C, D, ...) + if (N->getNumOperands() == 2 && + N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SmallVector<SDValue, 8> Opnds; + unsigned BuildVecNumElts = N0.getNumOperands(); + + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], + Opnds.size()); + } + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -10142,6 +10621,33 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N2 = N->getOperand(2); + + // If the input vector is a concatenation, and the insert replaces + // one of the halves, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); + EVT VT = N->getValueType(0); + + // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors Z, Y) + if (InsIdx == 0) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N->getOperand(1), N0.getOperand(1)); + + // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors X, Z) + if (InsIdx == VT.getVectorNumElements()/2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), N->getOperand(1)); + } + + return SDValue(); +} + /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -10204,18 +10710,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // this operation. if (LHS.getOpcode() == ISD::BUILD_VECTOR && RHS.getOpcode() == ISD::BUILD_VECTOR) { + // Check if both vectors are constants. If not bail out. + if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && + cast<BuildVectorSDNode>(RHS)->isConstant())) + return SDValue(); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { SDValue LHSOp = LHS.getOperand(i); SDValue RHSOp = RHS.getOperand(i); - // If these two elements can't be folded, bail out. - if ((LHSOp.getOpcode() != ISD::UNDEF && - LHSOp.getOpcode() != ISD::Constant && - LHSOp.getOpcode() != ISD::ConstantFP) || - (RHSOp.getOpcode() != ISD::UNDEF && - RHSOp.getOpcode() != ISD::Constant && - RHSOp.getOpcode() != ISD::ConstantFP)) - break; // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || @@ -10862,14 +11365,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), - AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); + AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, + UseTBAA ? TBAAInfo1 : 0), + AliasAnalysis::Location(SrcValue2, Overlap2, + UseTBAA ? TBAAInfo2 : 0)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -10956,7 +11466,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, if (Depth > 6 || Aliases.size() == 2) { Aliases.clear(); Aliases.push_back(OriginalChain); - break; + return; } // Don't bother if we've been before. @@ -11018,6 +11528,63 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } + + // We need to be careful here to also search for aliases through the + // value operand of a store, etc. Consider the following situation: + // Token1 = ... + // L1 = load Token1, %52 + // S1 = store Token1, L1, %51 + // L2 = load Token1, %52+8 + // S2 = store Token1, L2, %51+8 + // Token2 = Token(S1, S2) + // L3 = load Token2, %53 + // S3 = store Token2, L3, %52 + // L4 = load Token2, %53+8 + // S4 = store Token2, L4, %52+8 + // If we search for aliases of S3 (which loads address %52), and we look + // only through the chain, then we'll miss the trivial dependence on L1 + // (which also loads from %52). We then might change all loads and + // stores to use Token1 as their chain operand, which could result in + // copying %53 into %52 before copying %52 into %51 (which should + // happen first). + // + // The problem is, however, that searching for such data dependencies + // can become expensive, and the cost is not directly related to the + // chain depth. Instead, we'll rule out such configurations here by + // insisting that we've visited all chain users (except for users + // of the original chain, which is not necessary). When doing this, + // we need to look through nodes we don't care about (otherwise, things + // like register copies will interfere with trivial cases). + + SmallVector<const SDNode *, 16> Worklist; + for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), + IE = Visited.end(); I != IE; ++I) + if (*I != OriginalChain.getNode()) + Worklist.push_back(*I); + + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + + // We have already visited M, and want to make sure we've visited any uses + // of M that we care about. For uses that we've not visisted, and don't + // care about, queue them to the worklist. + + for (SDNode::use_iterator UI = M->use_begin(), + UIE = M->use_end(); UI != UIE; ++UI) + if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { + // We've not visited this use, and we care about it (it could have an + // ordering dependency with the original node). + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + // We've not visited this use, but we don't care about it. Mark it as + // visited and enqueue it to the worklist. + Worklist.push_back(*UI); + } + } } /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index a6f746140d..baba51eaf2 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -49,8 +49,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -118,7 +118,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { // No-op casts are trivially coalesced by fast-isel. if (const CastInst *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; @@ -133,7 +133,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { !(I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::PtrToInt || I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); + cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { @@ -192,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = - getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) { Reg = TargetMaterializeFloatZero(CF); @@ -229,7 +229,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } @@ -335,20 +335,20 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; - DebugLoc OldDL = DL; + DebugLoc OldDL = DbgLoc; recomputeInsertPt(); - DL = DebugLoc(); + DbgLoc = DebugLoc(); SavePoint SP = { OldInsertPt, OldDL }; return SP; } void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) - LastLocalValue = llvm::prior(FuncInfo.InsertPt); + LastLocalValue = std::prev(FuncInfo.InsertPt); // Restore the previous insert position. FuncInfo.InsertPt = OldInsertPt.InsertPt; - DL = OldInsertPt.DL; + DbgLoc = OldInsertPt.DL; } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -484,7 +484,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -503,7 +503,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset TotalOffs += - TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -524,7 +524,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = TD.getTypeAllocSize(Ty); + uint64_t ElementSize = DL.getTypeAllocSize(Ty); std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; @@ -572,7 +572,7 @@ bool FastISel::SelectCall(const User *I) { if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo); @@ -643,11 +643,11 @@ bool FastISel::SelectCall(const User *I) { if (Op) { if (Op->isReg()) { Op->setIsDebug(true); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, DI->getVariable()); } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(0) @@ -667,26 +667,26 @@ bool FastISel::SelectCall(const User *I) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require @@ -798,8 +798,8 @@ bool FastISel::SelectBitCast(const User *I) { // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Op0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); } } @@ -822,7 +822,7 @@ FastISel::SelectInstruction(const Instruction *I) { if (!HandlePHINodesInSuccessorBlocks(I->getParent())) return false; - DL = I->getDebugLoc(); + DbgLoc = I->getDebugLoc(); MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; @@ -840,7 +840,7 @@ FastISel::SelectInstruction(const Instruction *I) { // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Remove dead code. However, ignore call instructions since we've flushed @@ -855,7 +855,7 @@ FastISel::SelectInstruction(const Instruction *I) { SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { ++NumFastIselSuccessTarget; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Check for dead code and remove as necessary. @@ -863,7 +863,7 @@ FastISel::SelectInstruction(const Instruction *I) { if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); - DL = DebugLoc(); + DbgLoc = DebugLoc(); return false; } @@ -871,7 +871,7 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { @@ -881,7 +881,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, - SmallVector<MachineOperand, 0>(), DL); + SmallVector<MachineOperand, 0>(), DbgLoc); } FuncInfo.MBB->addSuccessor(MSucc); } @@ -1096,7 +1096,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getDataLayout()), + DL(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), @@ -1209,7 +1209,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); return ResultReg; } @@ -1220,13 +1220,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1240,15 +1240,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1262,17 +1262,17 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1285,15 +1285,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1306,17 +1306,17 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1329,15 +1329,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1351,17 +1351,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1375,17 +1375,17 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1397,11 +1397,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1413,12 +1413,12 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1432,7 +1432,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) + DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1498,9 +1498,9 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DL = PN->getDebugLoc(); + DbgLoc = PN->getDebugLoc(); if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) - DL = Inst->getDebugLoc(); + DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { @@ -1508,7 +1508,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); - DL = DebugLoc(); + DbgLoc = DebugLoc(); } } @@ -1523,7 +1523,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // this by scanning the single-use users of the load until we get to FoldInst. unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - const Instruction *TheUser = LI->use_back(); + const Instruction *TheUser = LI->user_back(); while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && @@ -1532,7 +1532,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { if (!TheUser->hasOneUse()) return false; - TheUser = TheUser->use_back(); + TheUser = TheUser->user_back(); } // If we didn't find the fold instruction, then we failed to collapse the @@ -1559,7 +1559,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return false; MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); - MachineInstr *User = &*RI; + MachineInstr *User = RI->getParent(); // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes; make @@ -1576,8 +1576,8 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { if (!isa<AddOperator>(Add)) return false; // Type size needs to match. - if (TD.getTypeSizeInBits(GEP->getType()) != - TD.getTypeSizeInBits(Add->getType())) + if (DL.getTypeSizeInBits(GEP->getType()) != + DL.getTypeSizeInBits(Add->getType())) return false; // Must be in the same basic block. if (isa<Instruction>(Add) && diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4309dc1d48..5f0006e237 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -32,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -46,16 +47,15 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (I->use_empty()) return false; if (isa<PHINode>(I)) return true; const BasicBlock *BB = I->getParent(); - for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : I->users()) if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) return true; - } + return false; } -void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, + SelectionDAG *DAG) { const TargetLowering *TLI = TM.getTargetLowering(); Fn = &fn; @@ -74,7 +74,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + // Don't fold inalloca allocas or other dynamic allocas into the initial + // stack frame allocation, even if they are in the entry block. + if (!AI->isStaticAlloca()) + continue; + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); @@ -85,21 +90,51 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - // The object may need to be placed onto the stack near the stack - // protector if one exists. Determine here if this object is a suitable - // candidate. I.e., it would trigger the creation of a stack protector. - bool MayNeedSP = - (AI->isArrayAllocation() || - (TySize >= 8 && isa<ArrayType>(Ty) && - cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP, AI); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } + } for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Look for dynamic allocas. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + if (!AI->isStaticAlloca()) { + unsigned Align = std::max( + (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( + AI->getAllocatedType()), + AI->getAlignment()); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + // Inform the Frame Information that we have variable-sized objects. + MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI); + } + } + + // Look for inline asm that clobbers the SP register. + if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + ImmutableCallSite CS(I); + if (isa<InlineAsm>(CS.getCalledValue())) { + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + std::vector<TargetLowering::AsmOperandInfo> Ops = + TLI->ParseConstraints(CS); + for (size_t I = 0, E = Ops.size(); I != E; ++I) { + TargetLowering::AsmOperandInfo &Op = Ops[I]; + if (Op.Type == InlineAsm::isClobber) { + // Clobbers don't have SDValue operands, hence SDValue(). + TLI->ComputeConstraintToUse(Op, SDValue(), DAG); + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, + Op.ConstraintVT); + if (PhysReg.first == SP) + MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); + } + } + } + } + // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3a8fb85911..1c596b8c42 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -220,10 +220,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); - // If the register class is unknown for the given definition, then try to - // infer one from the value type. - if (!RC && i < NumResults) - RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); + // Always let the value type influence the used register class. The + // constraints on the instruction may be too lax to represent the value + // type correctly. For example, a 64-bit float (X86::FR64) can't live in + // the 32-bit float super-class (X86::FR32). + if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { + const TargetRegisterClass *VTRC = + TLI->getRegClassFor(Node->getSimpleValueType(i)); + if (RC) + VTRC = TRI->getCommonSubClass(RC, VTRC); + if (VTRC) + RC = VTRC; + } + if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -731,10 +740,16 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumDefs = II.getNumDefs(); const uint16_t *ScratchRegs = NULL; - // Handle PATCHPOINT specially and then use the generic code. - if (Opc == TargetOpcode::PATCHPOINT) { - unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); - NumDefs = NumResults; + // Handle STACKMAP and PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Stackmaps do not have arguments and do not preserve their calling + // convention. However, to simplify runtime support, they clobber the same + // scratch registers as AnyRegCC. + unsigned CC = CallingConv::AnyReg; + if (Opc == TargetOpcode::PATCHPOINT) { + CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9061ae9f76..20afb3d594 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -13,15 +13,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -152,10 +153,10 @@ private: public: // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { ForgetNode(N); } - virtual void NodeUpdated(SDNode *N) {} + void NodeUpdated(SDNode *N) override {} // Node replacement helpers void ReplacedNode(SDNode *N) { @@ -729,10 +730,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + unsigned AS = ST->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) @@ -740,6 +742,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -807,7 +810,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -818,7 +821,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); @@ -826,7 +829,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -840,16 +843,18 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = ST->getAddressSpace(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -889,10 +894,11 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = LD->getAddressSpace(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); @@ -901,6 +907,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(RVal, DAG); if (Res.getNode()) { @@ -1017,7 +1024,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1047,7 +1054,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1059,77 +1066,82 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH + isCustom = true; + // FALLTHROUGH case TargetLowering::Legal: { - Value = SDValue(Node, 0); - Chain = SDValue(Node, 1); - - if (isCustom) { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { - Value = Res; - Chain = Res.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); - } - } - } - break; + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support + // it, expand it. + EVT MemVT = LD->getMemoryVT(); + unsigned AS = LD->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, SrcVT, - LD->getMemOperand()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Value = ValRes; - Chain = Result.getValue(1); - break; + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && + TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getMemOperand()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), + Chain, Ptr, SrcVT, + LD->getMemOperand()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, + SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; } } @@ -1383,10 +1395,39 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); SDLoc dl(Op); - // Store the value to a temporary stack slot, then LOAD the returned part. - SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + + // Before we generate a new store to a temporary stack slot, see if there is + // already one that we can use. There often is because when we scalarize + // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole + // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in + // the vector. If all are expanded here, we don't want one store per vector + // element. + SDValue StackPtr, Ch; + for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), + UE = Vec.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { + if (ST->isIndexed() || ST->isTruncatingStore() || + ST->getValue() != Vec) + continue; + + // Make sure that nothing else could have stored into the destination of + // this store. + if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) + continue; + + StackPtr = ST->getBasePtr(); + Ch = SDValue(ST, 0); + break; + } + } + + if (!Ch.getNode()) { + // Store the value to a temporary stack slot, then LOAD the returned part. + StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + } // Add the offset to the index. unsigned EltSize = @@ -1530,9 +1571,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // the pointer so that the loaded integer will contain the sign bit. unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1555,8 +1595,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // Select between the nabs and abs value based on the sign bit of // the input. return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1776,6 +1816,98 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, false, 0); } +static bool +ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue &Res) { + unsigned NumElems = Node->getNumOperands(); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Try to group the scalars into pairs, shuffle the pairs together, then + // shuffle the pairs of pairs together, etc. until the vector has + // been built. This will work only if all of the necessary shuffle masks + // are legal. + + // We do this in two phases; first to check the legality of the shuffles, + // and next, assuming that all shuffles are legal, to create the new nodes. + for (int Phase = 0; Phase < 2; ++Phase) { + SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, + NewIntermedVals; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + + SDValue Vec; + if (Phase) + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); + IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i))); + } + + while (IntermedVals.size() > 2) { + NewIntermedVals.clear(); + for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { + // This vector and the next vector are shuffled together (simply to + // append the one to the other). + SmallVector<int, 16> ShuffleVec(NumElems, -1); + + SmallVector<int, 16> FinalIndices; + FinalIndices.reserve(IntermedVals[i].second.size() + + IntermedVals[i+1].second.size()); + + int k = 0; + for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = j; + FinalIndices.push_back(IntermedVals[i].second[j]); + } + for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = NumElems + j; + FinalIndices.push_back(IntermedVals[i+1].second[j]); + } + + SDValue Shuffle; + if (Phase) + Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, + IntermedVals[i+1].first, + ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + } + + // If we had an odd number of defined values, then append the last + // element to the array of new vectors. + if ((IntermedVals.size() & 1) != 0) + NewIntermedVals.push_back(IntermedVals.back()); + + IntermedVals.swap(NewIntermedVals); + } + + assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && + "Invalid number of intermediate vectors"); + SDValue Vec1 = IntermedVals[0].first; + SDValue Vec2; + if (IntermedVals.size() > 1) + Vec2 = IntermedVals[1].first; + else if (Phase) + Vec2 = DAG.getUNDEF(VT); + + SmallVector<int, 16> ShuffleVec(NumElems, -1); + for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[0].second[i]] = i; + for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; + + if (Phase) + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + } + + return true; +} /// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. @@ -1850,25 +1982,38 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { false, false, false, Alignment); } - if (!MoreThanTwoValues) { - SmallVector<int, 8> ShuffleVec(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - ShuffleVec[i] = V == Value1 ? 0 : NumElems; - } - if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { - // Get the splatted value into the low element of a vector register. - SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); - SDValue Vec2; - if (Value2.getNode()) - Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); - else - Vec2 = DAG.getUNDEF(VT); + SmallSet<SDValue, 16> DefinedValues; + for (unsigned i = 0; i < NumElems; ++i) { + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + DefinedValues.insert(Node->getOperand(i)); + } - // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } else { + SDValue Res; + if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) + return Res; } } @@ -2868,6 +3013,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(1), Zero, Zero, cast<AtomicSDNode>(Node)->getMemOperand(), cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); @@ -3099,7 +3245,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. - // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. @@ -3107,8 +3254,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, - VT.getSizeInBits()/NewEltVT.getSizeInBits()); + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT @@ -3116,7 +3264,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask - unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); @@ -3782,8 +3931,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); @@ -3813,8 +3962,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; @@ -3976,7 +4125,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } case ISD::SELECT: { unsigned ExtOp, TruncOp; - if (Node->getValueType(0).isVector()) { + if (Node->getValueType(0).isVector() || + Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4255948ea1..18b2376b8b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -193,10 +193,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), - N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getMemOperand(), N->getOrdering(), - N->getSynchScope()); + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), + N->getChain(), N->getBasePtr(), Op2, Op3, + N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -2448,6 +2448,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), cast<AtomicSDNode>(N)->getSynchScope()); ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); @@ -2577,13 +2578,17 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSLo.getValueType()) && + TLI.isTypeLegal(RHSLo.getValueType())) + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSHi.getValueType()) && + TLI.isTypeLegal(RHSHi.getValueType())) + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()), diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index eb132304ef..e141883744 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -634,7 +634,7 @@ namespace { : SelectionDAG::DAGUpdateListener(dtl.getDAG()), DTL(dtl), NodesToAnalyze(nta) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && N->getNodeId() != DAGTypeLegalizer::Processed && "Invalid node ID for RAUW deletion!"); @@ -655,7 +655,7 @@ namespace { NodesToAnalyze.insert(E); } - virtual void NodeUpdated(SDNode *N) { + void NodeUpdated(SDNode *N) override { // Node updates can mean pretty much anything. It is possible that an // operand was set to something already processed (f.e.) in which case // this node could become ready. Recompute its flags. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 13bb08f08c..947ea10fd7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -541,6 +541,7 @@ private: SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -670,13 +671,13 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non widen memory + /// stores to store a widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non widen memory + /// stores to store a truncate widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c749fdea9f..e9424f2cde 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -78,8 +78,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); - llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -518,7 +518,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); else - llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -540,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2c3cdccb56..551d0549c8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -77,6 +77,10 @@ class VectorLegalizer { // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input // operand to the next size up. SDValue PromoteVectorOpINT_TO_FP(SDValue Op); + // Implements FP_TO_[SU]INT vector promotion of the result type; it is + // promoted to the next size up integer type. The result is then truncated + // back to the original type. + SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned); public: bool Run(); @@ -88,7 +92,7 @@ bool VectorLegalizer::Run() { // Before we start legalizing vector nodes, check if there are any vectors. bool HasVectors = false; for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { // Check if the values of the nodes contain vectors. We don't need to check // the operands because we are going to check their values at some point. for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); @@ -112,7 +116,7 @@ bool VectorLegalizer::Run() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -210,6 +214,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -273,6 +278,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Result = PromoteVectorOpINT_TO_FP(Op); Changed = true; break; + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + Changed = true; + break; } break; case TargetLowering::Legal: break; @@ -351,14 +362,9 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // // Increase the bitwidth of the element to the next pow-of-two // (which is greater than 8 bits). - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); - assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); - - // Build a new vector type and check if it is legal. - MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); @@ -375,6 +381,35 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { Operands.size()); } +// For FP_TO_INT we promote the result type to a vector type with wider +// elements and then truncate the result. This is different from the default +// PromoteVector which uses bitcast to promote thus assumning that the +// promoted vector type has the same overall size. +SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) { + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + EVT VT = Op.getValueType(); + + EVT NewVT; + unsigned NewOpc; + while (1) { + NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { + NewOpc = ISD::FP_TO_SINT; + break; + } + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { + NewOpc = ISD::FP_TO_UINT; + break; + } + } + + SDLoc loc(Op); + SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); +} + SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDLoc dl(Op); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f7a3e3d250..940a9c9059 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -65,6 +65,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -384,6 +385,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; } } @@ -467,6 +471,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getOriginalAlignment(), N->getTBAAInfo()); } +/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} //===----------------------------------------------------------------------===// // Result Vector Splitting @@ -521,6 +534,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -624,7 +638,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); @@ -679,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -700,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -716,7 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -740,7 +754,7 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, @@ -804,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -814,7 +828,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -828,7 +842,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, @@ -859,12 +873,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc DL(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. SDValue LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -875,7 +889,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. @@ -883,7 +897,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); else - llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -912,7 +926,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SrcVT = N->getOperand(0).getValueType(); EVT DestVT = N->getValueType(0); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); // We can do better than a generic split operation if the extend is doing // more than just doubling the width of the elements and the following are @@ -938,7 +952,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SplitSrcVT = EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); EVT SplitLoVT, SplitHiVT; - llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { DEBUG(dbgs() << "Split vector extend via incremental extend:"; @@ -947,7 +961,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); // Get the low and high halves of the new, extended one step, vector. - llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); // Extend those vector halves the rest of the way. Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1160,13 +1174,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { "Lo and Hi have differing types"); EVT LoOpVT, HiOpVT; - llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; - llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); - llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); - llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1281,7 +1295,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; @@ -1370,7 +1384,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // Extract the halves of the input via extract_subvector. SDValue InLoVec, InHiVec; - llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -2180,6 +2194,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); + // If we have to split the condition there is no point in widening the + // select. This would result in an cycle of widening the select -> + // widening the condition operand -> splitting the condition operand -> + // splitting the select -> widening the select. Instead split this select + // further and widen the resulting type. + if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { + SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); + SDValue Res = ModifyToType(SplitSelect, WidenVT); + return Res; + } + if (Cond1.getValueType() != CondWidenVT) Cond1 = ModifyToType(Cond1, CondWidenVT); } @@ -2251,7 +2276,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); - assert(InVT.isVector() && "can not widen non vector type"); + assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1dd2128b8b..3b3424dfe0 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -603,7 +603,7 @@ SUnit *ResourcePriorityQueue::pop() { std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { signed BestCost = SUSchedulingCost(*Best); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { if (SUSchedulingCost(*I) > BestCost) { @@ -614,14 +614,14 @@ SUnit *ResourcePriorityQueue::pop() { } // Use default TD scheduling mechanism. else { - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; } SUnit *V = *Best; - if (Best != prior(Queue.end())) + if (Best != std::prev(Queue.end())) std::swap(*Best, Queue.back()); Queue.pop_back(); @@ -633,7 +633,7 @@ SUnit *ResourcePriorityQueue::pop() { void ResourcePriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 4af7172847..b62bd623c4 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/DebugLoc.h" namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 6c5e0ab8b2..0687392a34 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -80,7 +80,7 @@ public: ScheduleDAGFast(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. @@ -107,7 +107,7 @@ private: void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool forceUnitLatencies() const { return true; } + bool forceUnitLatencies() const override { return true; } }; } // end anonymous namespace @@ -646,9 +646,10 @@ class ScheduleDAGLinearize : public ScheduleDAGSDNodes { public: ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + MachineBasicBlock * + EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; private: std::vector<SDNode*> Sequence; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1a562d74b4..c28366456a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -177,7 +177,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } @@ -261,7 +261,7 @@ private: /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool forceUnitLatencies() const { + bool forceUnitLatencies() const override { return !NeedLatency; } }; @@ -1539,7 +1539,6 @@ template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; reverse_sort(SF &sf) : SortFunc(sf) {} - reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} bool operator()(SUnit* left, SUnit* right) const { // reverse left/right rather than simply !SortFunc(left, right) @@ -1559,7 +1558,6 @@ struct bu_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1574,8 +1572,6 @@ struct src_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - src_ls_rr_sort(const src_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1590,8 +1586,6 @@ struct hybrid_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1609,8 +1603,6 @@ struct ilp_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1675,13 +1667,13 @@ public: return scheduleDAG->getHazardRec(); } - void initNodes(std::vector<SUnit> &sunits); + void initNodes(std::vector<SUnit> &sunits) override; - void addNode(const SUnit *SU); + void addNode(const SUnit *SU) override; - void updateNode(const SUnit *SU); + void updateNode(const SUnit *SU) override; - void releaseState() { + void releaseState() override { SUnits = 0; SethiUllmanNumbers.clear(); std::fill(RegPressure.begin(), RegPressure.end(), 0); @@ -1695,26 +1687,26 @@ public: return SU->getNode()->getIROrder(); } - bool empty() const { return Queue.empty(); } + bool empty() const override { return Queue.empty(); } - void push(SUnit *U) { + void push(SUnit *U) override { assert(!U->NodeQueueId && "Node in the queue already"); U->NodeQueueId = ++CurQueueId; Queue.push_back(U); } - void remove(SUnit *SU) { + void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); SU->NodeQueueId = 0; } - bool tracksRegPressure() const { return TracksRegPressure; } + bool tracksRegPressure() const override { return TracksRegPressure; } void dumpRegPressure() const; @@ -1724,9 +1716,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void scheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU) override; - void unscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU) override; protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1738,12 +1730,12 @@ protected: template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; - if (Best != prior(Q.end())) + if (Best != std::prev(Q.end())) std::swap(*Best, Q.back()); Q.pop_back(); return V; @@ -1776,13 +1768,13 @@ public: tii, tri, tli), Picker(this) {} - bool isBottomUp() const { return SF::IsBottomUp; } + bool isBottomUp() const override { return SF::IsBottomUp; } - bool isReady(SUnit *U) const { + bool isReady(SUnit *U) const override { return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); } - SUnit *pop() { + SUnit *pop() override { if (Queue.empty()) return NULL; SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 054e3dd840..5639894d09 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -738,13 +738,13 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || // Fast-isel may have inserted some instructions, in which case the // BB->back().isPHI() test will not fire when we want it to. - prior(Emitter.getInsertPos())->isPHI()) { + std::prev(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; } - Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2ff37e0a15..5e11dbb5fb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -117,13 +117,13 @@ namespace llvm { virtual MachineBasicBlock* EmitSchedule(MachineBasicBlock::iterator &InsertPos); - virtual void dumpNode(const SUnit *SU) const; + void dumpNode(const SUnit *SU) const override; void dumpSchedule() const; - virtual std::string getGraphNodeLabel(const SUnit *SU) const; + std::string getGraphNodeLabel(const SUnit *SU) const override; - virtual std::string getDAGName() const; + std::string getDAGName() const override; virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 58aa1fe0eb..fb861030de 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -80,7 +80,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; private: void releaseSucc(SUnit *SU, const SDep &D); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45d5a4fa69..d11ce80424 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,17 +18,15 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -179,6 +177,22 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantSDNode>(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. @@ -217,6 +231,21 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -369,9 +398,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { llvm_unreachable("Should only be used on nodes with operands"); default: break; // Normal nodes don't need extra info. case ISD::TargetConstant: - case ISD::Constant: - ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + case ISD::Constant: { + const ConstantSDNode *C = cast<ConstantSDNode>(N); + ID.AddPointer(C->getConstantIntValue()); + ID.AddBoolean(C->isOpaque()); break; + } case ISD::TargetConstantFP: case ISD::ConstantFP: { ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); @@ -869,7 +901,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(0), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), UpdateListeners(0) { @@ -877,10 +909,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, - const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { MF = &mf; - TTI = tti; TLI = tli; Context = &mf.getFunction()->getContext(); } @@ -956,19 +986,21 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); - return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { - return getConstant(*ConstantInt::get(*Context, Val), VT, isT); +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +{ + return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, + bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1010,7 +1042,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) .trunc(ViaEltSizeInBits), - ViaEltVT, isT)); + ViaEltVT, isT, isO)); } // EltParts is currently in little endian order. If we actually want @@ -1041,6 +1073,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); ID.AddPointer(Elt); + ID.AddBoolean(isO); void *IP = 0; SDNode *N = NULL; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) @@ -1048,7 +1081,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -1139,7 +1172,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (!GVar) { // If GV is an alias then use the aliasee for determining thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal()); } unsigned Opc; @@ -2502,17 +2535,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { - // Constant fold unary operations with an integer constant operand. + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { const APInt &Val = C->getAPIntValue(); switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), @@ -2529,15 +2568,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: - return getConstant(Val.byteSwap(), VT); + return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTPOP: - return getConstant(Val.countPopulation(), VT); + return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countLeadingZeros(), VT); + return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countTrailingZeros(), VT); + return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); } } @@ -2774,10 +2817,13 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); - if (Scalar1 && Scalar2) { + if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) + return SDValue(); + + if (Scalar1 && Scalar2) // Scalar instruction. Inputs.push_back(std::make_pair(Scalar1, Scalar2)); - } else { + else { // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); @@ -2793,6 +2839,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, if (!V1 || !V2) // Not a constant, bail. return SDValue(); + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); + // Avoid BUILD_VECTOR nodes that perform implicit truncation. // FIXME: This is valid and could be handled by truncating the APInts. if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) @@ -3546,10 +3595,10 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - // If the "cost" of materializing the integer immediate is 1 or free, then - // it is cost effective to turn the load into the immediate. - const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); - if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + // If the "cost" of materializing the integer immediate is less than the cost + // of a load, then it is cost effective to turn the load into the immediate. + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, VT); return SDValue(0, 0); } @@ -3609,8 +3658,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || - TLI.allowsUnalignedMemoryAccesses(VT)) { + unsigned AS = 0; + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || + TLI.allowsUnalignedMemoryAccesses(VT, AS)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3667,9 +3717,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; + unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -4182,9 +4233,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTList, SDValue* Ops, unsigned NumOps, + SDVTList VTList, SDValue *Ops, unsigned NumOps, MachineMemOperand *MMO, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); @@ -4206,17 +4258,28 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, MemVT, Ops, DynOps, NumOps, MMO, - Ordering, SynchScope); + SuccessOrdering, FailureOrdering, + SynchScope); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue *Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -4237,14 +4300,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, - Ordering, SynchScope); + SuccessOrdering, FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -4253,7 +4317,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering, + FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -5633,7 +5698,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { SDNode::use_iterator &UI; SDNode::use_iterator &UE; - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Increment the iterator as needed. while (UI != UE && N == *UI) ++UI; @@ -6457,7 +6522,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) { + bool isBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -6518,6 +6583,27 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } +ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const { + SDValue Op0 = getOperand(0); + if (Op0.getOpcode() != ISD::Constant) + return nullptr; + + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) + if (getOperand(i) != Op0) + return nullptr; + + return cast<ConstantSDNode>(Op0); +} + +bool BuildVectorSDNode::isConstant() const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + unsigned Opc = getOperand(i).getOpcode(); + if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) + return false; + } + return true; +} + bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Find the first non-undef value in the shuffle mask. unsigned i, e; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2b2713d248..4a6e5cf036 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -34,10 +34,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -214,6 +214,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, llvm_unreachable("Unknown mismatch!"); } +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger then ValueVT then AssertOp can be used to specify whether the @@ -306,16 +320,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("non-trivial scalar-to-vector conversion"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } @@ -397,18 +403,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("scalar-to-vector conversion failed"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } - } + if (PartEVT != ValueVT) + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); Parts[0] = Val; return; @@ -627,16 +624,6 @@ namespace { } } - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal(const TargetLowering &TLI) { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT RegisterVT = RegVTs[Value]; - if (!TLI.isTypeLegal(RegisterVT)) - return false; - } - return true; - } - /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); @@ -851,12 +838,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + unsigned TheReg = Regs[Reg++]; + Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); + + if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { + // If we clobbered the stack pointer, MFI should know about it. + assert(DAG.getMachineFunction().getFrameInfo()-> + hasInlineAsmWithSPAdjust()); + } } } } @@ -866,7 +861,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -884,6 +879,7 @@ void SelectionDAGBuilder::clear() { PendingExports.clear(); CurInst = NULL; HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -1384,7 +1380,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + uint32_t TWeight, + uint32_t FWeight) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1409,7 +1407,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB); + BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); return; } @@ -1417,17 +1415,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB); + NULL, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); } +/// Scale down both weights to fit into uint32_t. +static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc) { + unsigned Opc, uint32_t TWeight, + uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1435,7 +1442,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TWeight, FWeight); return; } @@ -1447,6 +1455,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, if (Opc == Instruction::Or) { // Codegen X | Y as: + // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: @@ -1454,14 +1463,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // jmp FBB // + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + uint64_t NewTrueWeight = TWeight; + uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = TWeight; + NewFalseWeight = 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: + // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: @@ -1470,11 +1499,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // // This requires creation of TmpBB after CurBB. + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + + uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; + uint64_t NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = 2 * (uint64_t)TWeight; + NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } } @@ -1525,8 +1571,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); - // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1561,7 +1608,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode()); + BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -2351,7 +2399,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, volatile double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); + volatile double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -2590,7 +2638,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); @@ -2936,6 +2984,13 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only regcognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + /*isOpaque*/true)); else setValue(&I, N); // noop cast. } @@ -3261,7 +3316,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3275,7 +3330,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = - TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); @@ -3292,7 +3347,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - TD->getTypeAllocSize(Ty)); + DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3370,9 +3425,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); - // Inform the Frame Information that we have just allocated a variable-sized - // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); + assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3400,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile() || NumValues > MaxParallelChains) + if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( @@ -3413,6 +3466,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + const TargetLowering *TLI = TM.getTargetLowering(); + if (isVolatile) + Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), NumValues)); @@ -3536,14 +3593,15 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering SuccessOrder = I.getSuccessOrdering(); + AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); SDValue L = @@ -3554,13 +3612,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); SDValue OutChain = L.getValue(1); if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, DAG, *TLI); setValue(&I, L); @@ -3637,6 +3696,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), @@ -5283,7 +5343,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5324,6 +5384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { (void)getControlRoot(); return 0; } + case Intrinsic::clear_cache: + return TLI->getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return 0; @@ -5366,6 +5428,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, int DemoteStackIdx = -100; if (!CanLowerReturn) { + assert(!CS.hasInAllocaArgument() && + "sret demotion is incompatible with inalloca"); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( @@ -5508,9 +5572,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (const User *U : V->users()) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) @@ -5534,7 +5597,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.TD)) + Builder.DL)) return Builder.getValue(LoadCst); } @@ -5653,9 +5716,13 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -6026,7 +6093,7 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout *TD) const { + const DataLayout *DL) const { if (CallOperandVal == 0) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) @@ -6052,7 +6119,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = DL->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -6108,7 +6175,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); - if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -6241,7 +6308,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). getSimpleVT(); } @@ -6716,11 +6783,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &TD = *TLI->getDataLayout(); + const DataLayout &DL = *TLI->getDataLayout(); SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - TD.getABITypeAlignment(I.getType())); + DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -6781,6 +6848,42 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, return TLI->LowerCallTo(CLI); } +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else + Ops.push_back(OpVal); + } +} + /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, @@ -6788,61 +6891,64 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Callee = getValue(CI.getCalledValue()); + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; - // Lower into a call sequence with no args and no return value. - std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, true); - /// Get a call instruction from the call sequence chain. - /// Tail calls are not allowed. - SDNode *CallEnd = Chain.getNode(); - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + InFlag = Chain.getValue(1); - // Replace the target specific call node with the stackmap intrinsic. - SmallVector<SDValue, 8> Ops; + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); - // Add the <id> and <numShadowBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } // Push live variables for the stack map. - for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + addStackMapLiveVars(CI, 2, Ops, *this); - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. - // Push the glue flag (last operand). - if (hasGlue) - Ops.push_back(*(Call->op_end()-1)); + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); - // Replace the target specific call node with a STACKMAP node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), - NodeTys, Ops); + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); - // StackMap generates no value, so nothing goes in the NodeMap. + // Stackmaps don't generate values, so nothing goes into the NodeMap. - // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); - DAG.DeleteNode(Call); + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, // i32 <numArgs>, @@ -6855,17 +6961,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { SDValue Callee = getValue(CI.getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - unsigned NumArgs = - cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> - assert(CI.getNumArgOperands() >= NumArgs + 4 && + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); // Set the root to the target-lowered call chain. SDValue Chain = Result.second; @@ -6885,13 +6993,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; - // Add the <id> and <numNopBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + // Assume that the Callee is a constant address. + // FIXME: handle function symbols in the future. Ops.push_back( DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), /*isTarget=*/true)); @@ -6909,25 +7020,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (isAnyRegCC) - for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CI.getArgOperand(i))); - // Push the arguments from the call instruction. + // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = getValue(CI.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); - } else - Ops.push_back(OpVal); - } + addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. if (hasGlue) @@ -6981,6 +7083,9 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -7037,8 +7142,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7202,12 +7317,10 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : A->users()) if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. - } + return true; } @@ -7215,7 +7328,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const TargetLowering *TLI = getTargetLowering(); - const DataLayout *TD = TLI->getDataLayout(); + const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { @@ -7247,7 +7360,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = - TD->getABITypeAlignment(ArgTy); + DL->getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7257,11 +7370,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 835f643cc1..66835bf557 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,9 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> @@ -57,6 +56,7 @@ class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; class MDNode; +class MVT; class PHINode; class PtrToIntInst; class ReturnInst; @@ -488,8 +488,12 @@ private: private: const TargetMachine &TM; public: + /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling + /// nodes without a corresponding SDNode. + static const unsigned LowestSDNodeOrder = 1; + SelectionDAG &DAG; - const DataLayout *TD; + const DataLayout *DL; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -534,7 +538,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), + : CurInst(NULL), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -608,11 +612,13 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc); + MachineBasicBlock *SwitchBB, unsigned Opc, + uint32_t TW, uint32_t FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB); + MachineBasicBlock *SwitchBB, + uint32_t TW, uint32_t FW); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); @@ -627,7 +633,7 @@ public: bool useVoidTy = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the - /// references that ned to refer to the last resulting block. + /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c04a08d57f..535febae0f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -14,11 +14,10 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" @@ -82,7 +81,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; case ISD::RegisterMask: return "RegisterMask"; - case ISD::Constant: return "Constant"; + case ISD::Constant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueConstant"; + return "Constant"; case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; @@ -112,7 +114,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { } case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueTargetConstant"; + return "TargetConstant"; case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; @@ -352,7 +357,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; - if (llvm::next(i) != e) + if (std::next(i) != e) OS << " "; } OS << ">"; @@ -385,7 +390,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<GlobalAddressSDNode>(this)) { int64_t offset = GADN->getOffset(); OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); + GADN->getGlobal()->printAsOperand(OS); OS << '>'; if (offset > 0) OS << " + " << offset; @@ -476,9 +481,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<BlockAddressSDNode>(this)) { int64_t offset = BA->getOffset(); OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); OS << ">"; if (offset > 0) OS << " + " << offset; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3a0cfa16ae..5d0e2b937d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -33,8 +32,8 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -213,7 +212,7 @@ MachinePassRegistry RegisterScheduler::Registry; static cl::opt<RegisterScheduler::FunctionPassCtor, false, RegisterPassParser<RegisterScheduler> > ISHeuristic("pre-RA-sched", - cl::init(&createDefaultScheduler), + cl::init(&createDefaultScheduler), cl::Hidden, cl::desc("Instruction schedulers available (before register" " allocation):")); @@ -400,7 +399,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); LibInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = getAnalysisIfAvailable<TargetTransformInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; TargetSubtargetInfo &ST = @@ -418,8 +416,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI, TLI); - FuncInfo->set(Fn, *MF); + CurDAG->init(*MF, TLI); + FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); @@ -428,7 +426,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); - MF->setHasMSInlineAsm(false); + MF->setHasInlineAsm(false); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be @@ -448,7 +447,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); - unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + unsigned Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { @@ -456,7 +456,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (Def) { MachineBasicBlock::iterator InsertPos = Def; // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + Def->getParent()->insert(std::next(InsertPos), MI); } else DEBUG(dbgs() << "Dropping debug info for dead vreg" << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); @@ -483,9 +483,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. MachineInstr *CopyUseMI = NULL; - for (MachineRegisterInfo::use_iterator - UI = RegInfo->use_begin(LDI->second); - MachineInstr *UseMI = UI.skipInstruction();) { + for (MachineRegisterInfo::use_instr_iterator + UI = RegInfo->use_instr_begin(LDI->second), + E = RegInfo->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); if (UseMI->isDebugValue()) continue; if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { CopyUseMI = UseMI; continue; @@ -511,7 +512,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - if (MFI->hasCalls() && MF->hasMSInlineAsm()) + if (MFI->hasCalls() && MF->hasInlineAsm()) break; const MachineBasicBlock *MBB = I; @@ -522,8 +523,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); } - if (II->isMSInlineAsm()) { - MF->setHasMSInlineAsm(true); + if (II->isInlineAsm()) { + MF->setHasInlineAsm(true); } } } @@ -563,6 +564,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); + DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + DEBUG(MF->print(dbgs())); + return true; } @@ -800,7 +804,7 @@ public: /// NodeDeleted - Handle nodes deleted from the graph. If the node being /// deleted is the current ISelPosition node, update ISelPosition. /// - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } @@ -1063,7 +1067,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) - FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); else FastIS->setLastLocalValue(0); } @@ -1071,7 +1075,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = llvm::prior(BI); + const Instruction *Inst = std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1092,7 +1096,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to fold the load if so. const Instruction *BeforeInst = Inst; while (BeforeInst != Begin) { - BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1100,7 +1104,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. - BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; } @@ -2194,8 +2198,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI, - unsigned ChildNo) { + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); @@ -2231,6 +2234,14 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2313,6 +2324,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); return Index; + case SelectionDAGISel::OPC_CheckChild0Integer: + case SelectionDAGISel::OPC_CheckChild1Integer: + case SelectionDAGISel::OPC_CheckChild2Integer: + case SelectionDAGISel::OPC_CheckChild3Integer: + case SelectionDAGISel::OPC_CheckChild4Integer: + Result = !::CheckChildInteger(Table, Index, N, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); + return Index; case SelectionDAGISel::OPC_CheckAndImm: Result = !::CheckAndImm(Table, Index, N, SDISel); return Index; @@ -2693,6 +2712,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; continue; + case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: + case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: + case OPC_CheckChild4Integer: + if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, + Opcode-OPC_CheckChild0Integer)) break; + continue; case OPC_CheckAndImm: if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index b752b482e3..1483fddfdd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -15,12 +15,11 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 82b068d25c..5de0b030c7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" @@ -74,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); Alignment = CS->getParamAlignment(AttrIdx); } @@ -1115,6 +1117,54 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { (KnownOne.countPopulation() == 1); } +bool TargetLowering::isConstTrueVal(const SDNode *N) const { + if (!N) + return false; + + bool IsVec = false; + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + IsVec = true; + CN = BV->getConstantSplatValue(); + } + + switch (getBooleanContents(IsVec)) { + case UndefinedBooleanContent: + return CN->getAPIntValue()[0]; + case ZeroOrOneBooleanContent: + return CN->isOne(); + case ZeroOrNegativeOneBooleanContent: + return CN->isAllOnesValue(); + } + + llvm_unreachable("Invalid boolean contents"); +} + +bool TargetLowering::isConstFalseVal(const SDNode *N) const { + if (!N) + return false; + + bool IsVec = false; + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + IsVec = true; + CN = BV->getConstantSplatValue(); + } + + if (getBooleanContents(IsVec) == UndefinedBooleanContent) + return !CN->getAPIntValue()[0]; + + return CN->isNullValue(); +} + /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue @@ -1468,18 +1518,32 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true - // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1-1, N1.getValueType()), - (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true - // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1+1, N1.getValueType()), - (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + // X <= C0 --> X < (C0 + 1) + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) @@ -1535,7 +1599,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -1565,7 +1629,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), @@ -1593,7 +1657,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC = NewC.lshr(ShiftBits); if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, @@ -2663,3 +2727,14 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } } + +bool TargetLowering:: +verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { + if (!isa<ConstantSDNode>(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return true; + } + + return false; +} diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index f769b44efb..1120be8ed2 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getDataLayout()) { + : DL(TM.getDataLayout()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { |